{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Atari 游戏 SeaquestDeterministic-v4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 07:14:03,943 [DEBUG] Loaded backend module://ipykernel.pylab.backend_inline version unknown.\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "import os\n",
    "import sys\n",
    "import time\n",
    "import itertools\n",
    "import logging\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from PIL import Image\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "观测空间 = Box(210, 160, 3)\n",
      "动作空间 = Discrete(18)\n",
      "回合最大步数 = 100000\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[0, 592379725]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# env_spec_id = 'BreakoutDeterministic-v4'\n",
    "# env_spec_id = 'PongDeterministic-v4'\n",
    "env_spec_id = 'SeaquestDeterministic-v4'\n",
    "# env_spec_id = 'SpaceInvadersDeterministic-v4'\n",
    "# env_spec_id = 'BeamRiderDeterministic-v4'\n",
    "env = gym.make(env_spec_id)\n",
    "print('观测空间 = {}'.format(env.observation_space))\n",
    "print('动作空间 = {}'.format(env.action_space))\n",
    "print('回合最大步数 = {}'.format(env._max_episode_steps))\n",
    "env.seed(0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 深度 Q 网络智能体\n",
    "经验回放"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['observation', 'action', 'reward',\n",
    "                'next_observation', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "    \n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "        \n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return tuple(np.stack(self.memory.loc[indices, field]) for \\\n",
    "                field in self.memory.columns)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "智能体"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNAgent:\n",
    "    def __init__(self, env, input_shape, learning_rate=0.00025,\n",
    "            load_path=None, gamma=0.99,\n",
    "            replay_memory_size=1000000, batch_size=32,\n",
    "            replay_start_size=0,\n",
    "            epsilon=1., epsilon_decrease_rate=9e-7, min_epsilon=0.1,\n",
    "            random_inital_steps=0,\n",
    "            clip_reward=True, rescale_state=True,\n",
    "            update_freq=1, target_network_update_freq=1):\n",
    "        \n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = gamma\n",
    "        \n",
    "        # 经验回放参数\n",
    "        self.replay_memory_size = replay_memory_size\n",
    "        self.replay_start_size = replay_start_size\n",
    "        self.batch_size = batch_size\n",
    "        self.replayer = DQNReplayer(replay_memory_size)\n",
    "        \n",
    "        # 图像输入参数\n",
    "        self.img_shape = (input_shape[-1], input_shape[-2])\n",
    "        self.img_stack = input_shape[-3]\n",
    "        \n",
    "        # 探索参数\n",
    "        self.epsilon = epsilon\n",
    "        self.epsilon_decrease_rate = epsilon_decrease_rate\n",
    "        self.min_epsilon = min_epsilon\n",
    "        self.random_inital_steps = random_inital_steps\n",
    "        \n",
    "        self.clip_reward = clip_reward\n",
    "        self.rescale_state = rescale_state\n",
    "        \n",
    "        self.update_freq = update_freq\n",
    "        self.target_network_update_freq = target_network_update_freq\n",
    "        \n",
    "        # 评估网络\n",
    "        self.evaluate_net = self.build_network(\n",
    "                input_shape=input_shape, output_size=self.action_n,\n",
    "                conv_activation=tf.nn.relu,\n",
    "                fc_hidden_sizes=[512,], fc_activation=tf.nn.relu,\n",
    "                learning_rate=learning_rate, load_path=load_path)\n",
    "        self.evaluate_net.summary() # 输出网络结构\n",
    "        # 目标网络\n",
    "        self.target_net = self.build_network(\n",
    "                input_shape=input_shape, output_size=self.action_n,\n",
    "                conv_activation=tf.nn.relu,\n",
    "                fc_hidden_sizes=[512,], fc_activation=tf.nn.relu,\n",
    "                )\n",
    "        self.update_target_network()\n",
    "        \n",
    "        # 初始化计数值\n",
    "        self.step = 0\n",
    "        self.fit_count = 0\n",
    "\n",
    "\n",
    "    def build_network(self, input_shape, output_size, conv_activation,\n",
    "            fc_hidden_sizes, fc_activation, output_activation=None,\n",
    "            learning_rate=0.001, load_path=None):\n",
    "        # 网络输入格式: (样本, 通道, 行, 列)\n",
    "        model = keras.models.Sequential()\n",
    "        # tf 要求从 (样本, 通道, 行, 列) 改为 (样本, 行, 列, 通道)\n",
    "        model.add(keras.layers.Permute((2, 3, 1), input_shape=input_shape))\n",
    "        \n",
    "        # 卷积层\n",
    "        model.add(keras.layers.Conv2D(32, 8, strides=4,\n",
    "                activation=conv_activation))\n",
    "        model.add(keras.layers.Conv2D(64, 4, strides=2,\n",
    "                activation=conv_activation))\n",
    "        model.add(keras.layers.Conv2D(64, 3, strides=1,\n",
    "                activation=conv_activation))\n",
    "        \n",
    "        model.add(keras.layers.Flatten())\n",
    "        \n",
    "        # 全连接层\n",
    "        for hidden_size in fc_hidden_sizes:\n",
    "            model.add(keras.layers.Dense(hidden_size,\n",
    "                    activation=fc_activation))\n",
    "        model.add(keras.layers.Dense(output_size,\n",
    "                activation=output_activation))\n",
    "\n",
    "        if load_path is not None:\n",
    "            logging.info('载入网络权重 {}.'.format(load_path))\n",
    "            model.load_weights(load_path)\n",
    "\n",
    "        try: # tf2\n",
    "            optimizer = keras.optimizers.RMSprop(learning_rate, 0.95,\n",
    "                    momentum=0.95, epsilon=0.01)\n",
    "        except: # tf1\n",
    "            optimizer = tf.train.RMSPropOptimizer(learning_rate, 0.95,\n",
    "                    momentum=0.95, epsilon=0.01)\n",
    "        model.compile(loss=keras.losses.mse, optimizer=optimizer)\n",
    "        return model\n",
    "        \n",
    "    def get_next_state(self, state=None, observation=None):\n",
    "        img = Image.fromarray(observation, 'RGB') \n",
    "        img = img.resize(self.img_shape).convert('L') # 改大小,变灰度\n",
    "        img = np.asarray(img.getdata(), dtype=np.uint8\n",
    "                ).reshape(img.size[1], img.size[0]) # 转成 np.array\n",
    "        \n",
    "        # 堆叠图像\n",
    "        if state is None:\n",
    "            next_state = np.array([img,] * self.img_stack) # 初始化\n",
    "        else:\n",
    "            next_state = np.append(state[1:], [img,], axis=0)\n",
    "        return next_state\n",
    "    \n",
    "    def decide(self, state, test=False, step=None):\n",
    "        if step is not None and step < self.random_inital_steps:\n",
    "            epsilon = 1.\n",
    "        elif test:\n",
    "            epsilon = 0.05\n",
    "        else:\n",
    "            epsilon = self.epsilon\n",
    "        if np.random.rand() < epsilon:\n",
    "            action = np.random.choice(self.action_n)\n",
    "        else:\n",
    "            if self.rescale_state:\n",
    "                state = state / 128. - 1.\n",
    "            q_values = self.evaluate_net.predict(state[np.newaxis])[0]\n",
    "            action = np.argmax(q_values)\n",
    "        return action\n",
    "\n",
    "    def learn(self, state, action, reward, next_state, done):\n",
    "        self.replayer.store(state, action, reward, next_state, done)\n",
    "\n",
    "        self.step += 1\n",
    "        \n",
    "        if self.step % self.update_freq == 0 and \\\n",
    "                self.replayer.count >= self.replay_start_size:\n",
    "            states, actions, rewards, next_states, dones = \\\n",
    "                    self.replayer.sample(self.batch_size) # 回放\n",
    "\n",
    "            if self.rescale_state:\n",
    "                states = states / 128. - 1.\n",
    "                next_states = next_states / 128. - 1.\n",
    "            if self.clip_reward:\n",
    "                rewards = np.clip(rewards, -1., 1.)\n",
    "            \n",
    "            next_qs = self.target_net.predict(next_states)\n",
    "            next_max_qs = next_qs.max(axis=-1)\n",
    "            targets = self.evaluate_net.predict(states)\n",
    "            targets[range(self.batch_size), actions] = rewards + \\\n",
    "                    self.gamma * next_max_qs * (1. - dones)\n",
    "\n",
    "            h = self.evaluate_net.fit(states, targets, verbose=0)\n",
    "            self.fit_count += 1\n",
    "            \n",
    "            if self.fit_count % 100 == 0:\n",
    "                logging.info('训练 {}, 回合 {}, 存储大小 {}, 损失 {}' \\\n",
    "                        .format(self.fit_count, self.epsilon,\n",
    "                        self.replayer.count, h.history['loss'][0]))\n",
    "            \n",
    "            if self.fit_count % self.target_network_update_freq == 0:\n",
    "                self.update_target_network()\n",
    "        \n",
    "        # 更新 epsilon 的值：线性下降\n",
    "        if self.step >= self.replay_start_size:\n",
    "            self.epsilon = max(self.epsilon - self.epsilon_decrease_rate,\n",
    "                               self.min_epsilon)\n",
    "\n",
    "    def update_target_network(self): # 更新目标网络\n",
    "        self.target_net.set_weights(self.evaluate_net.get_weights())\n",
    "        logging.info('目标网络已更新')\n",
    "\n",
    "    def save_network(self, path): # 保存网络\n",
    "        dirname = os.path.dirname(save_path)\n",
    "        if not os.path.exists(dirname):\n",
    "            os.makedirs(dirname)\n",
    "            logging.info('创建文件夹 {}'.format(dirname))\n",
    "        self.evaluate_net.save_weights(path)\n",
    "        logging.info('网络权重已保存 {}'.format(path))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def test(env, agent, episodes=100, render=False, verbose=True):\n",
    "    steps, episode_rewards = [], []\n",
    "    for episode in range(episodes):\n",
    "        episode_reward = 0\n",
    "        observation = env.reset()\n",
    "        state = agent.get_next_state(None, observation)\n",
    "        for step in itertools.count():\n",
    "            if render:\n",
    "                env.render()\n",
    "            action = agent.decide(state, test=True, step=step)\n",
    "            observation, reward, done, info = env.step(action)\n",
    "            state = agent.get_next_state(state, observation)\n",
    "            episode_reward += reward\n",
    "            if done:\n",
    "                break\n",
    "        step += 1\n",
    "        steps.append(step)\n",
    "        episode_rewards.append(episode_reward)\n",
    "        logging.info('[测试] 回合 {}: 步骤 {}, 奖励 {}, 步数 {}'\n",
    "                .format(episode, step, episode_reward, np.sum(steps)))\n",
    "            \n",
    "    if verbose:\n",
    "        logging.info('[测试小结] 步数: 平均 = {}, 最小 = {}, 最大 = {}.' \\\n",
    "                .format(np.mean(steps), np.min(steps), np.max(steps)))\n",
    "        logging.info('[测试小结] 奖励: 平均 = {}, 最小 = {}, 最大 = {}' \\\n",
    "                .format(np.mean(episode_rewards), np.min(episode_rewards),\n",
    "                np.max(episode_rewards)))\n",
    "    return episode_rewards"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "参数设置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "render = False\n",
    "load_path = None\n",
    "save_path = './output/' + env.unwrapped.spec.id + '-' + \\\n",
    "        time.strftime('%Y%m%d-%H%M%S') + '/model.h5'\n",
    "\n",
    "\"\"\"\n",
    "Nature 文章使用的参数, 运行极慢, 请勿轻易尝试\n",
    "\"\"\"\n",
    "input_shape = (4, 110, 84) # 输入网络大小\n",
    "batch_size = 32\n",
    "replay_memory_size = 1000000\n",
    "target_network_update_freq = 10000\n",
    "gamma = 0.99\n",
    "update_freq = 4 # 训练网络的间隔\n",
    "learning_rate = 0.00025 # 优化器学习率\n",
    "epsilon = 1. # 初始探索率\n",
    "min_epsilon = 0.1 # 最终探索率\n",
    "epsilon_decrease = 9e-7 # 探索减小速度\n",
    "replay_start_size = 50000 # 开始训练前的经验数\n",
    "random_inital_steps = 30 # 每个回合开始时随机步数\n",
    "frames = 50000000 # 整个算法的总训练步数\n",
    "test_freq = 50000 # 验证智能体的步数间隔\n",
    "test_episodes = 100 # 每次验证智能体的回合数\n",
    "\n",
    "\n",
    "\"\"\"\n",
    "小规模参数, 运行时间数小时, 有一点点训练效果\n",
    "\"\"\"\n",
    "batch_size = 32\n",
    "replay_memory_size = 50000\n",
    "target_network_update_freq = 4000\n",
    "replay_start_size = 10000\n",
    "random_inital_steps = 30\n",
    "frames = 100000\n",
    "test_freq = 25000\n",
    "test_episodes = 50\n",
    "\n",
    "\n",
    "# \"\"\"\n",
    "# 超小规模参数, 数分钟即可运行完, 基本没有训练效果\n",
    "# \"\"\"\n",
    "# batch_size = 6\n",
    "# replay_memory_size = 5000\n",
    "# target_network_update_freq = 80\n",
    "# replay_start_size = 500\n",
    "# random_inital_steps = 30\n",
    "# frames = 7500\n",
    "# test_freq = 2500\n",
    "# test_episodes = 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "训练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "permute (Permute)            (None, 110, 84, 4)        0         \n",
      "_________________________________________________________________\n",
      "conv2d (Conv2D)              (None, 26, 20, 32)        8224      \n",
      "_________________________________________________________________\n",
      "conv2d_1 (Conv2D)            (None, 12, 9, 64)         32832     \n",
      "_________________________________________________________________\n",
      "conv2d_2 (Conv2D)            (None, 10, 7, 64)         36928     \n",
      "_________________________________________________________________\n",
      "flatten (Flatten)            (None, 4480)              0         \n",
      "_________________________________________________________________\n",
      "dense (Dense)                (None, 512)               2294272   \n",
      "_________________________________________________________________\n",
      "dense_1 (Dense)              (None, 18)                9234      \n",
      "=================================================================\n",
      "Total params: 2,381,490\n",
      "Trainable params: 2,381,490\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "2019-01-01 07:14:05,774 [INFO] 目标网络已更新\n",
      "2019-01-01 07:14:05,774 [INFO] 训练开始\n",
      "2019-01-01 07:14:41,394 [INFO] 训练 100, 回合 0.9996409000000118, 存储大小 400, 损失 0.000984652666375041\n",
      "2019-01-01 07:14:48,138 [INFO] 回合 0, 步数 490, 奖励 60.0, 总步数 491\n",
      "2019-01-01 07:15:11,088 [INFO] 训练 200, 回合 0.9992809000000237, 存储大小 800, 损失 7.847933011362329e-05\n",
      "2019-01-01 07:15:20,610 [INFO] 回合 1, 步数 441, 奖励 40.0, 总步数 933\n",
      "2019-01-01 07:15:40,014 [INFO] 训练 300, 回合 0.9989209000000355, 存储大小 1200, 损失 0.0018917738925665617\n",
      "2019-01-01 07:15:45,897 [INFO] 回合 2, 步数 351, 奖励 0.0, 总步数 1285\n",
      "2019-01-01 07:16:09,523 [INFO] 训练 400, 回合 0.9985609000000474, 存储大小 1600, 损失 1.497333050792804e-05\n",
      "2019-01-01 07:16:21,607 [INFO] 回合 3, 步数 483, 奖励 60.0, 总步数 1769\n",
      "2019-01-01 07:16:39,051 [INFO] 训练 500, 回合 0.9982009000000592, 存储大小 2000, 损失 1.6737792975618504e-05\n",
      "2019-01-01 07:17:05,193 [INFO] 回合 4, 步数 604, 奖励 120.0, 总步数 2374\n",
      "2019-01-01 07:17:07,575 [INFO] 训练 600, 回合 0.9978409000000711, 存储大小 2400, 损失 1.1924877981073223e-05\n",
      "2019-01-01 07:17:35,822 [INFO] 训练 700, 回合 0.9974809000000829, 存储大小 2800, 损失 1.4357136024045758e-05\n",
      "2019-01-01 07:17:39,659 [INFO] 回合 5, 步数 488, 奖励 100.0, 总步数 2863\n",
      "2019-01-01 07:18:03,907 [INFO] 训练 800, 回合 0.9971209000000948, 存储大小 3200, 损失 2.8091715648770332e-05\n",
      "2019-01-01 07:18:30,766 [INFO] 回合 6, 步数 728, 奖励 200.0, 总步数 3592\n",
      "2019-01-01 07:18:31,933 [INFO] 训练 900, 回合 0.9967609000001066, 存储大小 3600, 损失 1.943439565366134e-05\n",
      "2019-01-01 07:19:00,606 [INFO] 训练 1000, 回合 0.9964009000001185, 存储大小 4000, 损失 0.001696764025837183\n",
      "2019-01-01 07:19:29,721 [INFO] 训练 1100, 回合 0.9960409000001303, 存储大小 4400, 损失 2.7505819161888212e-05\n",
      "2019-01-01 07:19:33,001 [INFO] 回合 7, 步数 860, 奖励 140.0, 总步数 4453\n",
      "2019-01-01 07:19:58,203 [INFO] 训练 1200, 回合 0.9956809000001422, 存储大小 4800, 损失 2.1326763089746237e-05\n",
      "2019-01-01 07:20:03,927 [INFO] 回合 8, 步数 435, 奖励 40.0, 总步数 4889\n",
      "2019-01-01 07:20:26,993 [INFO] 训练 1300, 回合 0.995320900000154, 存储大小 5200, 损失 8.527861791662872e-05\n",
      "2019-01-01 07:20:34,111 [INFO] 回合 9, 步数 421, 奖励 40.0, 总步数 5311\n",
      "2019-01-01 07:20:55,619 [INFO] 训练 1400, 回合 0.9949609000001659, 存储大小 5600, 损失 2.375097574258689e-05\n",
      "2019-01-01 07:21:02,443 [INFO] 回合 10, 步数 397, 奖励 20.0, 总步数 5709\n",
      "2019-01-01 07:21:24,904 [INFO] 训练 1500, 回合 0.9946009000001778, 存储大小 6000, 损失 1.4647604984929785e-05\n",
      "2019-01-01 07:21:41,189 [INFO] 回合 11, 步数 526, 奖励 80.0, 总步数 6236\n",
      "2019-01-01 07:21:54,178 [INFO] 训练 1600, 回合 0.9942409000001896, 存储大小 6400, 损失 0.0017828207928687334\n",
      "2019-01-01 07:22:23,244 [INFO] 训练 1700, 回合 0.9938809000002015, 存储大小 6800, 损失 0.003335279878228903\n",
      "2019-01-01 07:22:27,926 [INFO] 回合 12, 步数 642, 奖励 140.0, 总步数 6879\n",
      "2019-01-01 07:22:52,334 [INFO] 训练 1800, 回合 0.9935209000002133, 存储大小 7200, 损失 1.9077680917689577e-05\n",
      "2019-01-01 07:23:00,938 [INFO] 回合 13, 步数 453, 奖励 60.0, 总步数 7333\n",
      "2019-01-01 07:23:21,783 [INFO] 训练 1900, 回合 0.9931609000002252, 存储大小 7600, 损失 2.0107354430365376e-05\n",
      "2019-01-01 07:23:30,238 [INFO] 回合 14, 步数 399, 奖励 20.0, 总步数 7733\n",
      "2019-01-01 07:23:51,089 [INFO] 训练 2000, 回合 0.992800900000237, 存储大小 8000, 损失 3.0115610570646822e-05\n",
      "2019-01-01 07:23:52,403 [INFO] 回合 15, 步数 301, 奖励 0.0, 总步数 8035\n",
      "2019-01-01 07:24:20,828 [INFO] 训练 2100, 回合 0.9924409000002489, 存储大小 8400, 损失 1.889149280032143e-05\n",
      "2019-01-01 07:24:21,440 [INFO] 回合 16, 步数 392, 奖励 40.0, 总步数 8428\n",
      "2019-01-01 07:24:50,393 [INFO] 训练 2200, 回合 0.9920809000002607, 存储大小 8800, 损失 2.7016916646971367e-05\n",
      "2019-01-01 07:24:52,191 [INFO] 回合 17, 步数 413, 奖励 20.0, 总步数 8842\n",
      "2019-01-01 07:25:20,548 [INFO] 训练 2300, 回合 0.9917209000002726, 存储大小 9200, 损失 1.7635797121329233e-05\n",
      "2019-01-01 07:25:50,976 [INFO] 训练 2400, 回合 0.9913609000002844, 存储大小 9600, 损失 1.1895139323314652e-05\n",
      "2019-01-01 07:25:51,294 [INFO] 回合 18, 步数 781, 奖励 160.0, 总步数 9624\n",
      "2019-01-01 07:26:20,928 [INFO] 训练 2500, 回合 0.9910009000002963, 存储大小 10000, 损失 0.0017201591981574893\n",
      "2019-01-01 07:26:21,636 [INFO] 回合 19, 步数 403, 奖励 40.0, 总步数 10028\n",
      "2019-01-01 07:26:51,144 [INFO] 训练 2600, 回合 0.9906409000003081, 存储大小 10400, 损失 1.6342786693712696e-05\n",
      "2019-01-01 07:27:08,379 [INFO] 回合 20, 步数 619, 奖励 120.0, 总步数 10648\n",
      "2019-01-01 07:27:21,817 [INFO] 训练 2700, 回合 0.99028090000032, 存储大小 10800, 损失 1.470058396080276e-05\n",
      "2019-01-01 07:27:47,892 [INFO] 回合 21, 步数 517, 奖励 60.0, 总步数 11166\n",
      "2019-01-01 07:27:52,210 [INFO] 训练 2800, 回合 0.9899209000003318, 存储大小 11200, 损失 1.355688800686039e-05\n",
      "2019-01-01 07:28:18,180 [INFO] 回合 22, 步数 397, 奖励 0.0, 总步数 11564\n",
      "2019-01-01 07:28:22,707 [INFO] 训练 2900, 回合 0.9895609000003437, 存储大小 11600, 损失 2.0865983969997615e-05\n",
      "2019-01-01 07:28:53,346 [INFO] 训练 3000, 回合 0.9892009000003555, 存储大小 12000, 损失 1.2883449016953819e-05\n",
      "2019-01-01 07:29:16,646 [INFO] 回合 23, 步数 770, 奖励 180.0, 总步数 12335\n",
      "2019-01-01 07:29:23,719 [INFO] 训练 3100, 回合 0.9888409000003674, 存储大小 12400, 损失 1.5765457646921277e-05\n",
      "2019-01-01 07:29:54,467 [INFO] 训练 3200, 回合 0.9884809000003792, 存储大小 12800, 损失 1.9362631064723246e-05\n",
      "2019-01-01 07:29:57,963 [INFO] 回合 24, 步数 536, 奖励 120.0, 总步数 12872\n",
      "2019-01-01 07:30:25,256 [INFO] 训练 3300, 回合 0.9881209000003911, 存储大小 13200, 损失 3.025901605724357e-05\n",
      "2019-01-01 07:30:43,391 [INFO] 回合 25, 步数 592, 奖励 60.0, 总步数 13465\n",
      "2019-01-01 07:30:56,154 [INFO] 训练 3400, 回合 0.987760900000403, 存储大小 13600, 损失 2.1675123207387514e-05\n",
      "2019-01-01 07:31:16,188 [INFO] 回合 26, 步数 419, 奖励 20.0, 总步数 13885\n",
      "2019-01-01 07:31:27,392 [INFO] 训练 3500, 回合 0.9874009000004148, 存储大小 14000, 损失 8.0830417573452e-05\n",
      "2019-01-01 07:31:58,528 [INFO] 训练 3600, 回合 0.9870409000004267, 存储大小 14400, 损失 2.7749789296649396e-05\n",
      "2019-01-01 07:32:23,153 [INFO] 回合 27, 步数 861, 奖励 240.0, 总步数 14747\n",
      "2019-01-01 07:32:29,848 [INFO] 训练 3700, 回合 0.9866809000004385, 存储大小 14800, 损失 2.4572538677603006e-05\n",
      "2019-01-01 07:33:01,225 [INFO] 训练 3800, 回合 0.9863209000004504, 存储大小 15200, 损失 0.0017765102675184608\n",
      "2019-01-01 07:33:08,133 [INFO] 回合 28, 步数 571, 奖励 80.0, 总步数 15319\n",
      "2019-01-01 07:33:32,689 [INFO] 训练 3900, 回合 0.9859609000004622, 存储大小 15600, 损失 1.3385237252805382e-05\n",
      "2019-01-01 07:33:33,761 [INFO] 回合 29, 步数 325, 奖励 0.0, 总步数 15645\n",
      "2019-01-01 07:34:04,311 [INFO] 训练 4000, 回合 0.9856009000004741, 存储大小 16000, 损失 2.4979592126328498e-05\n",
      "2019-01-01 07:34:04,347 [INFO] 目标网络已更新\n",
      "2019-01-01 07:34:25,443 [INFO] 回合 30, 步数 652, 奖励 140.0, 总步数 16298\n",
      "2019-01-01 07:34:36,245 [INFO] 训练 4100, 回合 0.9852409000004859, 存储大小 16400, 损失 1.1246081157878507e-05\n",
      "2019-01-01 07:35:08,069 [INFO] 训练 4200, 回合 0.9848809000004978, 存储大小 16800, 损失 1.2313961633481085e-05\n",
      "2019-01-01 07:35:19,430 [INFO] 回合 31, 步数 680, 奖励 160.0, 总步数 16979\n",
      "2019-01-01 07:35:40,033 [INFO] 训练 4300, 回合 0.9845209000005096, 存储大小 17200, 损失 1.3619142919196747e-05\n",
      "2019-01-01 07:35:59,362 [INFO] 回合 32, 步数 495, 奖励 80.0, 总步数 17475\n",
      "2019-01-01 07:36:12,449 [INFO] 训练 4400, 回合 0.9841609000005215, 存储大小 17600, 损失 1.4222159734345041e-05\n",
      "2019-01-01 07:36:45,244 [INFO] 训练 4500, 回合 0.9838009000005333, 存储大小 18000, 损失 2.2048956452636048e-05\n",
      "2019-01-01 07:37:03,870 [INFO] 回合 33, 步数 785, 奖励 180.0, 总步数 18261\n",
      "2019-01-01 07:37:18,552 [INFO] 训练 4600, 回合 0.9834409000005452, 存储大小 18400, 损失 1.1622211786743719e-05\n",
      "2019-01-01 07:37:51,820 [INFO] 训练 4700, 回合 0.983080900000557, 存储大小 18800, 损失 1.2470849469536915e-05\n",
      "2019-01-01 07:38:00,945 [INFO] 回合 34, 步数 687, 奖励 140.0, 总步数 18949\n",
      "2019-01-01 07:38:24,837 [INFO] 训练 4800, 回合 0.9827209000005689, 存储大小 19200, 损失 1.963220893230755e-05\n",
      "2019-01-01 07:38:29,541 [INFO] 回合 35, 步数 342, 奖励 20.0, 总步数 19292\n",
      "2019-01-01 07:38:57,922 [INFO] 训练 4900, 回合 0.9823609000005807, 存储大小 19600, 损失 1.1711121260304935e-05\n",
      "2019-01-01 07:38:59,280 [INFO] 回合 36, 步数 363, 奖励 40.0, 总步数 19656\n",
      "2019-01-01 07:39:31,439 [INFO] 训练 5000, 回合 0.9820009000005926, 存储大小 20000, 损失 1.6232563211815432e-05\n",
      "2019-01-01 07:39:42,338 [INFO] 回合 37, 步数 514, 奖励 80.0, 总步数 20171\n",
      "2019-01-01 07:40:04,920 [INFO] 训练 5100, 回合 0.9816409000006044, 存储大小 20400, 损失 1.1776903193094768e-05\n",
      "2019-01-01 07:40:31,381 [INFO] 回合 38, 步数 585, 奖励 100.0, 总步数 20757\n",
      "2019-01-01 07:40:38,495 [INFO] 训练 5200, 回合 0.9812809000006163, 存储大小 20800, 损失 1.606634396011941e-05\n",
      "2019-01-01 07:41:08,865 [INFO] 回合 39, 步数 441, 奖励 60.0, 总步数 21199\n",
      "2019-01-01 07:41:12,649 [INFO] 训练 5300, 回合 0.9809209000006281, 存储大小 21200, 损失 0.0017851587617769837\n",
      "2019-01-01 07:41:45,109 [INFO] 回合 40, 步数 422, 奖励 40.0, 总步数 21622\n",
      "2019-01-01 07:41:46,789 [INFO] 训练 5400, 回合 0.98056090000064, 存储大小 21600, 损失 1.1773476217058487e-05\n",
      "2019-01-01 07:42:20,801 [INFO] 训练 5500, 回合 0.9802009000006519, 存储大小 22000, 损失 1.590411011420656e-05\n",
      "2019-01-01 07:42:31,377 [INFO] 回合 41, 步数 544, 奖励 120.0, 总步数 22167\n",
      "2019-01-01 07:42:54,734 [INFO] 训练 5600, 回合 0.9798409000006637, 存储大小 22400, 损失 1.5735073247924447e-05\n",
      "2019-01-01 07:43:29,039 [INFO] 训练 5700, 回合 0.9794809000006756, 存储大小 22800, 损失 0.00168048741761595\n",
      "2019-01-01 07:43:49,493 [INFO] 回合 42, 步数 918, 奖励 220.0, 总步数 23086\n",
      "2019-01-01 07:44:03,382 [INFO] 训练 5800, 回合 0.9791209000006874, 存储大小 23200, 损失 7.878894393797964e-06\n",
      "2019-01-01 07:44:21,858 [INFO] 回合 43, 步数 371, 奖励 40.0, 总步数 23458\n",
      "2019-01-01 07:44:38,464 [INFO] 训练 5900, 回合 0.9787609000006993, 存储大小 23600, 损失 1.3419463357422501e-05\n",
      "2019-01-01 07:45:13,577 [INFO] 训练 6000, 回合 0.9784009000007111, 存储大小 24000, 损失 1.7540520275360905e-05\n",
      "2019-01-01 07:45:14,359 [INFO] 回合 44, 步数 595, 奖励 140.0, 总步数 24054\n",
      "2019-01-01 07:45:49,201 [INFO] 训练 6100, 回合 0.978040900000723, 存储大小 24400, 损失 1.3728520571021363e-05\n",
      "2019-01-01 07:46:05,983 [INFO] 回合 45, 步数 607, 奖励 120.0, 总步数 24662\n",
      "2019-01-01 07:46:19,530 [INFO] 训练 6200, 回合 0.9776809000007348, 存储大小 24800, 损失 1.8212918803328648e-05\n",
      "2019-01-01 07:46:42,182 [INFO] [测试] 回合 0: 步骤 552, 奖励 60.0, 步数 552\n",
      "2019-01-01 07:46:59,152 [INFO] [测试] 回合 1: 步骤 827, 奖励 180.0, 步数 1379\n",
      "2019-01-01 07:47:08,060 [INFO] [测试] 回合 2: 步骤 437, 奖励 80.0, 步数 1816\n",
      "2019-01-01 07:47:22,517 [INFO] [测试] 回合 3: 步骤 701, 奖励 140.0, 步数 2517\n",
      "2019-01-01 07:47:31,127 [INFO] [测试] 回合 4: 步骤 426, 奖励 40.0, 步数 2943\n",
      "2019-01-01 07:47:40,618 [INFO] [测试] 回合 5: 步骤 463, 奖励 60.0, 步数 3406\n",
      "2019-01-01 07:48:06,395 [INFO] [测试] 回合 6: 步骤 1227, 奖励 240.0, 步数 4633\n",
      "2019-01-01 07:48:28,067 [INFO] [测试] 回合 7: 步骤 1047, 奖励 200.0, 步数 5680\n",
      "2019-01-01 07:48:35,236 [INFO] [测试] 回合 8: 步骤 363, 奖励 20.0, 步数 6043\n",
      "2019-01-01 07:48:44,166 [INFO] [测试] 回合 9: 步骤 450, 奖励 20.0, 步数 6493\n",
      "2019-01-01 07:49:03,858 [INFO] [测试] 回合 10: 步骤 958, 奖励 200.0, 步数 7451\n",
      "2019-01-01 07:49:27,731 [INFO] [测试] 回合 11: 步骤 1166, 奖励 300.0, 步数 8617\n",
      "2019-01-01 07:49:37,178 [INFO] [测试] 回合 12: 步骤 474, 奖励 80.0, 步数 9091\n",
      "2019-01-01 07:49:45,623 [INFO] [测试] 回合 13: 步骤 428, 奖励 60.0, 步数 9519\n",
      "2019-01-01 07:49:58,899 [INFO] [测试] 回合 14: 步骤 657, 奖励 200.0, 步数 10176\n",
      "2019-01-01 07:50:15,304 [INFO] [测试] 回合 15: 步骤 806, 奖励 220.0, 步数 10982\n",
      "2019-01-01 07:50:38,125 [INFO] [测试] 回合 16: 步骤 1115, 奖励 260.0, 步数 12097\n",
      "2019-01-01 07:50:48,932 [INFO] [测试] 回合 17: 步骤 539, 奖励 120.0, 步数 12636\n",
      "2019-01-01 07:50:57,059 [INFO] [测试] 回合 18: 步骤 411, 奖励 20.0, 步数 13047\n",
      "2019-01-01 07:51:06,424 [INFO] [测试] 回合 19: 步骤 467, 奖励 20.0, 步数 13514\n",
      "2019-01-01 07:51:17,019 [INFO] [测试] 回合 20: 步骤 523, 奖励 60.0, 步数 14037\n",
      "2019-01-01 07:51:25,720 [INFO] [测试] 回合 21: 步骤 437, 奖励 60.0, 步数 14474\n",
      "2019-01-01 07:51:40,744 [INFO] [测试] 回合 22: 步骤 734, 奖励 200.0, 步数 15208\n",
      "2019-01-01 07:51:56,324 [INFO] [测试] 回合 23: 步骤 761, 奖励 140.0, 步数 15969\n",
      "2019-01-01 07:52:07,948 [INFO] [测试] 回合 24: 步骤 567, 奖励 120.0, 步数 16536\n",
      "2019-01-01 07:52:17,842 [INFO] [测试] 回合 25: 步骤 489, 奖励 20.0, 步数 17025\n",
      "2019-01-01 07:52:26,481 [INFO] [测试] 回合 26: 步骤 432, 奖励 40.0, 步数 17457\n",
      "2019-01-01 07:52:44,707 [INFO] [测试] 回合 27: 步骤 897, 奖励 160.0, 步数 18354\n",
      "2019-01-01 07:52:58,152 [INFO] [测试] 回合 28: 步骤 654, 奖励 140.0, 步数 19008\n",
      "2019-01-01 07:53:12,136 [INFO] [测试] 回合 29: 步骤 693, 奖励 160.0, 步数 19701\n",
      "2019-01-01 07:53:22,204 [INFO] [测试] 回合 30: 步骤 499, 奖励 20.0, 步数 20200\n",
      "2019-01-01 07:53:31,217 [INFO] [测试] 回合 31: 步骤 447, 奖励 60.0, 步数 20647\n",
      "2019-01-01 07:53:45,025 [INFO] [测试] 回合 32: 步骤 679, 奖励 60.0, 步数 21326\n",
      "2019-01-01 07:53:56,244 [INFO] [测试] 回合 33: 步骤 550, 奖励 60.0, 步数 21876\n",
      "2019-01-01 07:54:09,018 [INFO] [测试] 回合 34: 步骤 625, 奖励 180.0, 步数 22501\n",
      "2019-01-01 07:54:31,921 [INFO] [测试] 回合 35: 步骤 1113, 奖励 280.0, 步数 23614\n",
      "2019-01-01 07:54:43,826 [INFO] [测试] 回合 36: 步骤 584, 奖励 100.0, 步数 24198\n",
      "2019-01-01 07:55:02,200 [INFO] [测试] 回合 37: 步骤 891, 奖励 180.0, 步数 25089\n",
      "2019-01-01 07:55:13,640 [INFO] [测试] 回合 38: 步骤 567, 奖励 20.0, 步数 25656\n",
      "2019-01-01 07:55:23,583 [INFO] [测试] 回合 39: 步骤 492, 奖励 80.0, 步数 26148\n",
      "2019-01-01 07:55:38,187 [INFO] [测试] 回合 40: 步骤 706, 奖励 80.0, 步数 26854\n",
      "2019-01-01 07:56:00,310 [INFO] [测试] 回合 41: 步骤 1073, 奖励 300.0, 步数 27927\n",
      "2019-01-01 07:56:13,385 [INFO] [测试] 回合 42: 步骤 649, 奖励 120.0, 步数 28576\n",
      "2019-01-01 07:56:23,388 [INFO] [测试] 回合 43: 步骤 496, 奖励 60.0, 步数 29072\n",
      "2019-01-01 07:56:32,563 [INFO] [测试] 回合 44: 步骤 460, 奖励 20.0, 步数 29532\n",
      "2019-01-01 07:56:44,790 [INFO] [测试] 回合 45: 步骤 596, 奖励 60.0, 步数 30128\n",
      "2019-01-01 07:56:59,206 [INFO] [测试] 回合 46: 步骤 717, 奖励 180.0, 步数 30845\n",
      "2019-01-01 07:57:23,081 [INFO] [测试] 回合 47: 步骤 1165, 奖励 300.0, 步数 32010\n",
      "2019-01-01 07:57:33,896 [INFO] [测试] 回合 48: 步骤 539, 奖励 100.0, 步数 32549\n",
      "2019-01-01 07:57:43,985 [INFO] [测试] 回合 49: 步骤 507, 奖励 160.0, 步数 33056\n",
      "2019-01-01 07:57:43,985 [INFO] [测试小结] 步数: 平均 = 661.12, 最小 = 363, 最大 = 1227.\n",
      "2019-01-01 07:57:43,985 [INFO] [测试小结] 奖励: 平均 = 120.8, 最小 = 20.0, 最大 = 300.0\n",
      "2019-01-01 07:57:43,997 [INFO] 创建文件夹 ./output/SeaquestDeterministic-v4-20190124-071404\n",
      "2019-01-01 07:57:44,218 [INFO] 网络权重已保存 ./output/SeaquestDeterministic-v4-20190124-071404/model.h5\n",
      "2019-01-01 07:57:44,435 [INFO] 网络权重已保存 ./output/SeaquestDeterministic-v4-20190124-071404/model.6238.h5\n",
      "2019-01-01 07:57:44,456 [INFO] 回合 46, 步数 339, 奖励 120.0, 总步数 25002\n",
      "2019-01-01 07:58:02,608 [INFO] 训练 6300, 回合 0.9773209000007467, 存储大小 25200, 损失 1.8554588677943684e-05\n",
      "2019-01-01 07:58:18,518 [INFO] 回合 47, 步数 464, 奖励 40.0, 总步数 25467\n",
      "2019-01-01 07:58:31,959 [INFO] 训练 6400, 回合 0.9769609000007585, 存储大小 25600, 损失 1.1091399755969178e-05\n",
      "2019-01-01 07:59:02,433 [INFO] 训练 6500, 回合 0.9766009000007704, 存储大小 26000, 损失 9.25009953789413e-06\n",
      "2019-01-01 07:59:02,478 [INFO] 回合 48, 步数 584, 奖励 100.0, 总步数 26052\n",
      "2019-01-01 07:59:30,006 [INFO] 回合 49, 步数 371, 奖励 20.0, 总步数 26424\n",
      "2019-01-01 07:59:32,017 [INFO] 训练 6600, 回合 0.9762409000007822, 存储大小 26400, 损失 1.942834205692634e-05\n",
      "2019-01-01 07:59:58,210 [INFO] 回合 50, 步数 372, 奖励 0.0, 总步数 26797\n",
      "2019-01-01 08:00:02,422 [INFO] 训练 6700, 回合 0.9758809000007941, 存储大小 26800, 损失 9.05703054741025e-05\n",
      "2019-01-01 08:00:33,234 [INFO] 训练 6800, 回合 0.9755209000008059, 存储大小 27200, 损失 2.673777453310322e-05\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 08:00:38,195 [INFO] 回合 51, 步数 507, 奖励 60.0, 总步数 27305\n",
      "2019-01-01 08:01:10,460 [INFO] 训练 6900, 回合 0.9751609000008178, 存储大小 27600, 损失 0.001699072658084333\n",
      "2019-01-01 08:01:26,647 [INFO] 回合 52, 步数 523, 奖励 80.0, 总步数 27829\n",
      "2019-01-01 08:01:47,928 [INFO] 训练 7000, 回合 0.9748009000008296, 存储大小 28000, 损失 0.0017145485617220402\n",
      "2019-01-01 08:02:05,888 [INFO] 回合 53, 步数 417, 奖励 40.0, 总步数 28247\n",
      "2019-01-01 08:02:25,051 [INFO] 训练 7100, 回合 0.9744409000008415, 存储大小 28400, 损失 0.00018454679229762405\n",
      "2019-01-01 08:02:52,496 [INFO] 回合 54, 步数 496, 奖励 40.0, 总步数 28744\n",
      "2019-01-01 08:03:02,779 [INFO] 训练 7200, 回合 0.9740809000008533, 存储大小 28800, 损失 1.7928719898918644e-05\n",
      "2019-01-01 08:03:32,295 [INFO] 回合 55, 步数 429, 奖励 40.0, 总步数 29174\n",
      "2019-01-01 08:03:40,227 [INFO] 训练 7300, 回合 0.9737209000008652, 存储大小 29200, 损失 9.458746717427857e-06\n",
      "2019-01-01 08:04:13,942 [INFO] 回合 56, 步数 435, 奖励 20.0, 总步数 29610\n",
      "2019-01-01 08:04:18,484 [INFO] 训练 7400, 回合 0.973360900000877, 存储大小 29600, 损失 0.0017526044975966215\n",
      "2019-01-01 08:04:55,953 [INFO] 训练 7500, 回合 0.9730009000008889, 存储大小 30000, 损失 1.3152459359844215e-05\n",
      "2019-01-01 08:05:06,361 [INFO] 回合 57, 步数 557, 奖励 100.0, 总步数 30168\n",
      "2019-01-01 08:05:34,156 [INFO] 训练 7600, 回合 0.9726409000009008, 存储大小 30400, 损失 2.1869596821488813e-05\n",
      "2019-01-01 08:05:56,400 [INFO] 回合 58, 步数 526, 奖励 100.0, 总步数 30695\n",
      "2019-01-01 08:06:12,173 [INFO] 训练 7700, 回合 0.9722809000009126, 存储大小 30800, 损失 0.0017275973223149776\n",
      "2019-01-01 08:06:36,401 [INFO] 回合 59, 步数 412, 奖励 20.0, 总步数 31108\n",
      "2019-01-01 08:06:51,472 [INFO] 训练 7800, 回合 0.9719209000009245, 存储大小 31200, 损失 1.5522944522672333e-05\n",
      "2019-01-01 08:07:16,805 [INFO] 回合 60, 步数 416, 奖励 20.0, 总步数 31525\n",
      "2019-01-01 08:07:30,051 [INFO] 训练 7900, 回合 0.9715609000009363, 存储大小 31600, 损失 1.26398717839038e-05\n",
      "2019-01-01 08:07:56,296 [INFO] 回合 61, 步数 410, 奖励 20.0, 总步数 31936\n",
      "2019-01-01 08:08:09,230 [INFO] 训练 8000, 回合 0.9712009000009482, 存储大小 32000, 损失 1.128458279708866e-05\n",
      "2019-01-01 08:08:09,313 [INFO] 目标网络已更新\n",
      "2019-01-01 08:08:48,087 [INFO] 回合 62, 步数 516, 奖励 60.0, 总步数 32453\n",
      "2019-01-01 08:08:49,296 [INFO] 训练 8100, 回合 0.97084090000096, 存储大小 32400, 损失 9.962055628420785e-06\n",
      "2019-01-01 08:09:22,134 [INFO] 回合 63, 步数 346, 奖励 0.0, 总步数 32800\n",
      "2019-01-01 08:09:28,594 [INFO] 训练 8200, 回合 0.9704809000009719, 存储大小 32800, 损失 1.1966068086621817e-05\n",
      "2019-01-01 08:10:06,767 [INFO] 回合 64, 步数 448, 奖励 40.0, 总步数 33249\n",
      "2019-01-01 08:10:08,514 [INFO] 训练 8300, 回合 0.9701209000009837, 存储大小 33200, 损失 0.001678770175203681\n",
      "2019-01-01 08:10:49,527 [INFO] 训练 8400, 回合 0.9697609000009956, 存储大小 33600, 损失 7.939264833112247e-06\n",
      "2019-01-01 08:11:12,284 [INFO] 回合 65, 步数 630, 奖励 120.0, 总步数 33880\n",
      "2019-01-01 08:11:31,715 [INFO] 训练 8500, 回合 0.9694009000010074, 存储大小 34000, 损失 1.1626323612290435e-05\n",
      "2019-01-01 08:12:09,685 [INFO] 回合 66, 步数 560, 奖励 40.0, 总步数 34441\n",
      "2019-01-01 08:12:12,461 [INFO] 训练 8600, 回合 0.9690409000010193, 存储大小 34400, 损失 1.8510800146032125e-05\n",
      "2019-01-01 08:12:52,804 [INFO] 训练 8700, 回合 0.9686809000010311, 存储大小 34800, 损失 6.550941634486662e-06\n",
      "2019-01-01 08:13:01,282 [INFO] 回合 67, 步数 511, 奖励 80.0, 总步数 34953\n",
      "2019-01-01 08:13:33,532 [INFO] 训练 8800, 回合 0.968320900001043, 存储大小 35200, 损失 1.1763575457734987e-05\n",
      "2019-01-01 08:14:15,136 [INFO] 训练 8900, 回合 0.9679609000010548, 存储大小 35600, 损失 9.888610293273814e-06\n",
      "2019-01-01 08:14:22,463 [INFO] 回合 68, 步数 789, 奖励 120.0, 总步数 35743\n",
      "2019-01-01 08:14:57,433 [INFO] 训练 9000, 回合 0.9676009000010667, 存储大小 36000, 损失 1.5431554857059382e-05\n",
      "2019-01-01 08:15:24,152 [INFO] 回合 69, 步数 584, 奖励 80.0, 总步数 36328\n",
      "2019-01-01 08:15:39,669 [INFO] 训练 9100, 回合 0.9672409000010785, 存储大小 36400, 损失 1.0009638572228141e-05\n",
      "2019-01-01 08:16:21,673 [INFO] 训练 9200, 回合 0.9668809000010904, 存储大小 36800, 损失 1.3339817087398842e-05\n",
      "2019-01-01 08:16:22,163 [INFO] 回合 70, 步数 547, 奖励 120.0, 总步数 36876\n",
      "2019-01-01 08:17:04,003 [INFO] 训练 9300, 回合 0.9665209000011022, 存储大小 37200, 损失 1.2864882592111826e-05\n",
      "2019-01-01 08:17:10,339 [INFO] 回合 71, 步数 451, 奖励 40.0, 总步数 37328\n",
      "2019-01-01 08:17:47,144 [INFO] 训练 9400, 回合 0.9661609000011141, 存储大小 37600, 损失 8.074401193880476e-06\n",
      "2019-01-01 08:18:03,906 [INFO] 回合 72, 步数 503, 奖励 60.0, 总步数 37832\n",
      "2019-01-01 08:18:30,046 [INFO] 训练 9500, 回合 0.965800900001126, 存储大小 38000, 损失 1.583484117873013e-05\n",
      "2019-01-01 08:18:52,046 [INFO] 回合 73, 步数 447, 奖励 40.0, 总步数 38280\n",
      "2019-01-01 08:19:12,778 [INFO] 训练 9600, 回合 0.9654409000011378, 存储大小 38400, 损失 0.00010793627006933093\n",
      "2019-01-01 08:19:45,698 [INFO] 回合 74, 步数 494, 奖励 80.0, 总步数 38775\n",
      "2019-01-01 08:19:56,757 [INFO] 训练 9700, 回合 0.9650809000011497, 存储大小 38800, 损失 9.215061254508328e-06\n",
      "2019-01-01 08:20:22,817 [INFO] 回合 75, 步数 340, 奖励 0.0, 总步数 39116\n",
      "2019-01-01 08:20:39,683 [INFO] 训练 9800, 回合 0.9647209000011615, 存储大小 39200, 损失 0.0017082589911296964\n",
      "2019-01-01 08:21:21,968 [INFO] 回合 76, 步数 542, 奖励 140.0, 总步数 39659\n",
      "2019-01-01 08:21:24,035 [INFO] 训练 9900, 回合 0.9643609000011734, 存储大小 39600, 损失 5.5525283642055e-06\n",
      "2019-01-01 08:22:08,353 [INFO] 训练 10000, 回合 0.9640009000011852, 存储大小 40000, 损失 8.51477489050012e-06\n",
      "2019-01-01 08:22:08,363 [INFO] 回合 77, 步数 418, 奖励 60.0, 总步数 40078\n",
      "2019-01-01 08:22:53,486 [INFO] 训练 10100, 回合 0.9636409000011971, 存储大小 40400, 损失 1.1277357771177776e-05\n",
      "2019-01-01 08:23:31,860 [INFO] 回合 78, 步数 754, 奖励 160.0, 总步数 40833\n",
      "2019-01-01 08:23:37,065 [INFO] 训练 10200, 回合 0.9632809000012089, 存储大小 40800, 损失 1.446308488084469e-05\n",
      "2019-01-01 08:24:21,002 [INFO] 回合 79, 步数 439, 奖励 60.0, 总步数 41273\n",
      "2019-01-01 08:24:21,832 [INFO] 训练 10300, 回合 0.9629209000012208, 存储大小 41200, 损失 0.002001728629693389\n",
      "2019-01-01 08:25:07,964 [INFO] 训练 10400, 回合 0.9625609000012326, 存储大小 41600, 损失 1.886449899757281e-05\n",
      "2019-01-01 08:25:10,028 [INFO] 回合 80, 步数 425, 奖励 60.0, 总步数 41699\n",
      "2019-01-01 08:25:54,280 [INFO] 训练 10500, 回合 0.9622009000012445, 存储大小 42000, 损失 7.647302481927909e-06\n",
      "2019-01-01 08:26:04,588 [INFO] 回合 81, 步数 476, 奖励 20.0, 总步数 42176\n",
      "2019-01-01 08:26:39,756 [INFO] 训练 10600, 回合 0.9618409000012563, 存储大小 42400, 损失 0.00021425964951049536\n",
      "2019-01-01 08:27:25,838 [INFO] 训练 10700, 回合 0.9614809000012682, 存储大小 42800, 损失 9.260434126190376e-06\n",
      "2019-01-01 08:27:26,393 [INFO] 回合 82, 步数 712, 奖励 120.0, 总步数 42889\n",
      "2019-01-01 08:28:12,401 [INFO] 训练 10800, 回合 0.96112090000128, 存储大小 43200, 损失 9.225485882780049e-06\n",
      "2019-01-01 08:28:58,881 [INFO] 训练 10900, 回合 0.9607609000012919, 存储大小 43600, 损失 1.6234411305049434e-05\n",
      "2019-01-01 08:29:04,418 [INFO] 回合 83, 步数 842, 奖励 220.0, 总步数 43732\n",
      "2019-01-01 08:29:46,038 [INFO] 训练 11000, 回合 0.9604009000013037, 存储大小 44000, 损失 1.020118816086324e-05\n",
      "2019-01-01 08:30:03,355 [INFO] 回合 84, 步数 494, 奖励 80.0, 总步数 44227\n",
      "2019-01-01 08:30:34,610 [INFO] 训练 11100, 回合 0.9600409000013156, 存储大小 44400, 损失 1.1858046491397545e-05\n",
      "2019-01-01 08:30:41,587 [INFO] 回合 85, 步数 317, 奖励 0.0, 总步数 44545\n",
      "2019-01-01 08:31:22,780 [INFO] 训练 11200, 回合 0.9596809000013274, 存储大小 44800, 损失 0.00013506057439371943\n",
      "2019-01-01 08:31:51,416 [INFO] 回合 86, 步数 587, 奖励 120.0, 总步数 45133\n",
      "2019-01-01 08:32:10,536 [INFO] 训练 11300, 回合 0.9593209000013393, 存储大小 45200, 损失 1.232778413395863e-05\n",
      "2019-01-01 08:32:47,265 [INFO] 回合 87, 步数 455, 奖励 60.0, 总步数 45589\n",
      "2019-01-01 08:32:59,254 [INFO] 训练 11400, 回合 0.9589609000013511, 存储大小 45600, 损失 1.0604115232126787e-05\n",
      "2019-01-01 08:33:48,280 [INFO] 训练 11500, 回合 0.958600900001363, 存储大小 46000, 损失 1.1327692845952697e-05\n",
      "2019-01-01 08:33:49,288 [INFO] 回合 88, 步数 510, 奖励 100.0, 总步数 46100\n",
      "2019-01-01 08:34:38,831 [INFO] 训练 11600, 回合 0.9582409000013749, 存储大小 46400, 损失 0.0001242491416633129\n",
      "2019-01-01 08:34:43,097 [INFO] 回合 89, 步数 425, 奖励 40.0, 总步数 46526\n",
      "2019-01-01 08:35:27,136 [INFO] 训练 11700, 回合 0.9578809000013867, 存储大小 46800, 损失 1.139254163717851e-05\n",
      "2019-01-01 08:35:46,354 [INFO] 回合 90, 步数 508, 奖励 100.0, 总步数 47035\n",
      "2019-01-01 08:36:19,243 [INFO] 训练 11800, 回合 0.9575209000013986, 存储大小 47200, 损失 0.00170795782469213\n",
      "2019-01-01 08:36:45,177 [INFO] 回合 91, 步数 469, 奖励 60.0, 总步数 47505\n",
      "2019-01-01 08:37:09,104 [INFO] 训练 11900, 回合 0.9571609000014104, 存储大小 47600, 损失 1.2093480108887888e-05\n",
      "2019-01-01 08:38:00,043 [INFO] 训练 12000, 回合 0.9568009000014223, 存储大小 48000, 损失 1.099120345315896e-05\n",
      "2019-01-01 08:38:00,140 [INFO] 目标网络已更新\n",
      "2019-01-01 08:38:47,496 [INFO] 回合 92, 步数 964, 奖励 240.0, 总步数 48470\n",
      "2019-01-01 08:38:50,565 [INFO] 训练 12100, 回合 0.9564409000014341, 存储大小 48400, 损失 0.000125945545732975\n",
      "2019-01-01 08:39:41,602 [INFO] 训练 12200, 回合 0.956080900001446, 存储大小 48800, 损失 0.0018107807263731956\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 08:40:03,223 [INFO] 回合 93, 步数 594, 奖励 120.0, 总步数 49065\n",
      "2019-01-01 08:40:33,839 [INFO] 训练 12300, 回合 0.9557209000014578, 存储大小 49200, 损失 1.6057316315709613e-05\n",
      "2019-01-01 08:41:25,515 [INFO] 训练 12400, 回合 0.9553609000014697, 存储大小 49600, 损失 1.7378642951371148e-05\n",
      "2019-01-01 08:41:32,166 [INFO] 回合 94, 步数 679, 奖励 100.0, 总步数 49745\n",
      "2019-01-01 08:42:27,828 [INFO] [测试] 回合 0: 步骤 522, 奖励 140.0, 步数 522\n",
      "2019-01-01 08:43:15,044 [INFO] [测试] 回合 1: 步骤 1066, 奖励 320.0, 步数 1588\n",
      "2019-01-01 08:43:46,848 [INFO] [测试] 回合 2: 步骤 707, 奖励 180.0, 步数 2295\n",
      "2019-01-01 08:44:12,806 [INFO] [测试] 回合 3: 步骤 589, 奖励 140.0, 步数 2884\n",
      "2019-01-01 08:44:32,024 [INFO] [测试] 回合 4: 步骤 539, 奖励 120.0, 步数 3423\n",
      "2019-01-01 08:45:00,330 [INFO] [测试] 回合 5: 步骤 805, 奖励 200.0, 步数 4228\n",
      "2019-01-01 08:45:33,847 [INFO] [测试] 回合 6: 步骤 749, 奖励 180.0, 步数 4977\n",
      "2019-01-01 08:46:04,012 [INFO] [测试] 回合 7: 步骤 676, 奖励 180.0, 步数 5653\n",
      "2019-01-01 08:47:00,370 [INFO] [测试] 回合 8: 步骤 1259, 奖励 300.0, 步数 6912\n",
      "2019-01-01 08:47:54,540 [INFO] [测试] 回合 9: 步骤 1207, 奖励 340.0, 步数 8119\n",
      "2019-01-01 08:48:44,077 [INFO] [测试] 回合 10: 步骤 1115, 奖励 340.0, 步数 9234\n",
      "2019-01-01 08:49:36,298 [INFO] [测试] 回合 11: 步骤 1143, 奖励 300.0, 步数 10377\n",
      "2019-01-01 08:50:01,421 [INFO] [测试] 回合 12: 步骤 570, 奖励 120.0, 步数 10947\n",
      "2019-01-01 08:50:30,929 [INFO] [测试] 回合 13: 步骤 640, 奖励 140.0, 步数 11587\n",
      "2019-01-01 08:51:23,192 [INFO] [测试] 回合 14: 步骤 1120, 奖励 300.0, 步数 12707\n",
      "2019-01-01 08:52:08,690 [INFO] [测试] 回合 15: 步骤 981, 奖励 240.0, 步数 13688\n",
      "2019-01-01 08:52:53,048 [INFO] [测试] 回合 16: 步骤 1003, 奖励 220.0, 步数 14691\n",
      "2019-01-01 08:53:38,112 [INFO] [测试] 回合 17: 步骤 1027, 奖励 200.0, 步数 15718\n",
      "2019-01-01 08:54:46,990 [INFO] [测试] 回合 18: 步骤 1532, 奖励 360.0, 步数 17250\n",
      "2019-01-01 08:55:30,502 [INFO] [测试] 回合 19: 步骤 972, 奖励 280.0, 步数 18222\n",
      "2019-01-01 08:56:02,638 [INFO] [测试] 回合 20: 步骤 711, 奖励 140.0, 步数 18933\n",
      "2019-01-01 08:57:08,679 [INFO] [测试] 回合 21: 步骤 1426, 奖励 400.0, 步数 20359\n",
      "2019-01-01 08:57:58,699 [INFO] [测试] 回合 22: 步骤 1139, 奖励 300.0, 步数 21498\n",
      "2019-01-01 08:58:53,991 [INFO] [测试] 回合 23: 步骤 1211, 奖励 320.0, 步数 22709\n",
      "2019-01-01 08:59:35,296 [INFO] [测试] 回合 24: 步骤 935, 奖励 180.0, 步数 23644\n",
      "2019-01-01 09:00:25,933 [INFO] [测试] 回合 25: 步骤 1120, 奖励 260.0, 步数 24764\n",
      "2019-01-01 09:00:48,590 [INFO] [测试] 回合 26: 步骤 508, 奖励 140.0, 步数 25272\n",
      "2019-01-01 09:01:22,133 [INFO] [测试] 回合 27: 步骤 764, 奖励 180.0, 步数 26036\n",
      "2019-01-01 09:02:04,059 [INFO] [测试] 回合 28: 步骤 928, 奖励 260.0, 步数 26964\n",
      "2019-01-01 09:02:49,334 [INFO] [测试] 回合 29: 步骤 1004, 奖励 240.0, 步数 27968\n",
      "2019-01-01 09:03:15,660 [INFO] [测试] 回合 30: 步骤 582, 奖励 100.0, 步数 28550\n",
      "2019-01-01 09:04:01,119 [INFO] [测试] 回合 31: 步骤 978, 奖励 200.0, 步数 29528\n",
      "2019-01-01 09:04:44,074 [INFO] [测试] 回合 32: 步骤 971, 奖励 280.0, 步数 30499\n",
      "2019-01-01 09:05:14,129 [INFO] [测试] 回合 33: 步骤 652, 奖励 80.0, 步数 31151\n",
      "2019-01-01 09:05:54,311 [INFO] [测试] 回合 34: 步骤 866, 奖励 100.0, 步数 32017\n",
      "2019-01-01 09:06:45,431 [INFO] [测试] 回合 35: 步骤 1121, 奖励 260.0, 步数 33138\n",
      "2019-01-01 09:07:12,161 [INFO] [测试] 回合 36: 步骤 585, 奖励 100.0, 步数 33723\n",
      "2019-01-01 09:07:43,978 [INFO] [测试] 回合 37: 步骤 699, 奖励 200.0, 步数 34422\n",
      "2019-01-01 09:08:57,722 [INFO] [测试] 回合 38: 步骤 1616, 奖励 520.0, 步数 36038\n",
      "2019-01-01 09:09:47,475 [INFO] [测试] 回合 39: 步骤 1110, 奖励 240.0, 步数 37148\n",
      "2019-01-01 09:10:41,847 [INFO] [测试] 回合 40: 步骤 1197, 奖励 340.0, 步数 38345\n",
      "2019-01-01 09:11:39,851 [INFO] [测试] 回合 41: 步骤 1232, 奖励 320.0, 步数 39577\n",
      "2019-01-01 09:12:05,064 [INFO] [测试] 回合 42: 步骤 539, 奖励 120.0, 步数 40116\n",
      "2019-01-01 09:12:44,920 [INFO] [测试] 回合 43: 步骤 868, 奖励 160.0, 步数 40984\n",
      "2019-01-01 09:13:17,394 [INFO] [测试] 回合 44: 步骤 700, 奖励 160.0, 步数 41684\n",
      "2019-01-01 09:13:56,641 [INFO] [测试] 回合 45: 步骤 871, 奖励 220.0, 步数 42555\n",
      "2019-01-01 09:14:26,038 [INFO] [测试] 回合 46: 步骤 674, 奖励 220.0, 步数 43229\n",
      "2019-01-01 09:15:14,912 [INFO] [测试] 回合 47: 步骤 1074, 奖励 220.0, 步数 44303\n",
      "2019-01-01 09:16:01,826 [INFO] [测试] 回合 48: 步骤 1015, 奖励 160.0, 步数 45318\n",
      "2019-01-01 09:16:54,234 [INFO] [测试] 回合 49: 步骤 1179, 奖励 280.0, 步数 46497\n",
      "2019-01-01 09:16:54,234 [INFO] [测试小结] 步数: 平均 = 929.94, 最小 = 508, 最大 = 1616.\n",
      "2019-01-01 09:16:54,234 [INFO] [测试小结] 奖励: 平均 = 226.0, 最小 = 80.0, 最大 = 520.0\n",
      "2019-01-01 09:16:54,402 [INFO] 网络权重已保存 ./output/SeaquestDeterministic-v4-20190124-071404/model.h5\n",
      "2019-01-01 09:16:54,485 [INFO] 网络权重已保存 ./output/SeaquestDeterministic-v4-20190124-071404/model.12476.h5\n",
      "2019-01-01 09:16:54,510 [INFO] 回合 95, 步数 256, 奖励 100.0, 总步数 50002\n",
      "2019-01-01 09:17:04,856 [INFO] 训练 12500, 回合 0.9550009000014815, 存储大小 50000, 损失 2.1839368855580688e-05\n",
      "2019-01-01 09:17:34,968 [INFO] 回合 96, 步数 385, 奖励 20.0, 总步数 50388\n",
      "2019-01-01 09:17:48,899 [INFO] 训练 12600, 回合 0.9546409000014934, 存储大小 50000, 损失 0.0017114962683990598\n",
      "2019-01-01 09:18:39,590 [INFO] 训练 12700, 回合 0.9542809000015052, 存储大小 50000, 损失 0.0017352700233459473\n",
      "2019-01-01 09:18:59,714 [INFO] 回合 97, 步数 666, 奖励 140.0, 总步数 51055\n",
      "2019-01-01 09:19:31,047 [INFO] 训练 12800, 回合 0.9539209000015171, 存储大小 50000, 损失 1.1952938621107023e-05\n",
      "2019-01-01 09:20:23,342 [INFO] 训练 12900, 回合 0.9535609000015289, 存储大小 50000, 损失 0.0016864617355167866\n",
      "2019-01-01 09:20:29,888 [INFO] 回合 98, 步数 693, 奖励 80.0, 总步数 51749\n",
      "2019-01-01 09:21:16,325 [INFO] 训练 13000, 回合 0.9532009000015408, 存储大小 50000, 损失 0.00014228072541300207\n",
      "2019-01-01 09:22:09,747 [INFO] 训练 13100, 回合 0.9528409000015526, 存储大小 50000, 损失 0.0017377285985276103\n",
      "2019-01-01 09:23:00,495 [INFO] 回合 99, 步数 1129, 奖励 360.0, 总步数 52879\n",
      "2019-01-01 09:23:03,625 [INFO] 训练 13200, 回合 0.9524809000015645, 存储大小 50000, 损失 2.692516500246711e-05\n",
      "2019-01-01 09:23:42,372 [INFO] 回合 100, 步数 316, 奖励 0.0, 总步数 53196\n",
      "2019-01-01 09:23:56,804 [INFO] 训练 13300, 回合 0.9521209000015763, 存储大小 50000, 损失 6.209825187397655e-06\n",
      "2019-01-01 09:24:28,386 [INFO] 回合 101, 步数 337, 奖励 0.0, 总步数 53534\n",
      "2019-01-01 09:24:50,672 [INFO] 训练 13400, 回合 0.9517609000015882, 存储大小 50000, 损失 8.891760444385e-06\n",
      "2019-01-01 09:25:44,228 [INFO] 训练 13500, 回合 0.9514009000016, 存储大小 50000, 损失 1.172350766864838e-05\n",
      "2019-01-01 09:25:59,411 [INFO] 回合 102, 步数 680, 奖励 160.0, 总步数 54215\n",
      "2019-01-01 09:26:38,086 [INFO] 训练 13600, 回合 0.9510409000016119, 存储大小 50000, 损失 1.4404084140551277e-05\n",
      "2019-01-01 09:27:03,433 [INFO] 回合 103, 步数 477, 奖励 60.0, 总步数 54693\n",
      "2019-01-01 09:27:31,862 [INFO] 训练 13700, 回合 0.9506809000016238, 存储大小 50000, 损失 1.8787435692502186e-05\n",
      "2019-01-01 09:28:01,221 [INFO] 回合 104, 步数 432, 奖励 40.0, 总步数 55126\n",
      "2019-01-01 09:28:25,166 [INFO] 训练 13800, 回合 0.9503209000016356, 存储大小 50000, 损失 0.00013582923565991223\n",
      "2019-01-01 09:28:55,979 [INFO] 回合 105, 步数 408, 奖励 40.0, 总步数 55535\n",
      "2019-01-01 09:29:18,884 [INFO] 训练 13900, 回合 0.9499609000016475, 存储大小 50000, 损失 0.001796795753762126\n",
      "2019-01-01 09:29:50,376 [INFO] 回合 106, 步数 406, 奖励 60.0, 总步数 55942\n",
      "2019-01-01 09:30:12,687 [INFO] 训练 14000, 回合 0.9496009000016593, 存储大小 50000, 损失 0.0016134879551827908\n",
      "2019-01-01 09:30:42,592 [INFO] 回合 107, 步数 389, 奖励 20.0, 总步数 56332\n",
      "2019-01-01 09:31:05,601 [INFO] 训练 14100, 回合 0.9492409000016712, 存储大小 50000, 损失 0.0017116809030994773\n",
      "2019-01-01 09:31:29,081 [INFO] 回合 108, 步数 352, 奖励 0.0, 总步数 56685\n",
      "2019-01-01 09:31:58,975 [INFO] 训练 14200, 回合 0.948880900001683, 存储大小 50000, 损失 1.5275298210326582e-05\n",
      "2019-01-01 09:32:26,487 [INFO] 回合 109, 步数 428, 奖励 40.0, 总步数 57114\n",
      "2019-01-01 09:32:52,440 [INFO] 训练 14300, 回合 0.9485209000016949, 存储大小 50000, 损失 1.7999302144744433e-05\n",
      "2019-01-01 09:33:27,576 [INFO] 回合 110, 步数 462, 奖励 80.0, 总步数 57577\n",
      "2019-01-01 09:33:45,488 [INFO] 训练 14400, 回合 0.9481609000017067, 存储大小 50000, 损失 0.0017729185055941343\n",
      "2019-01-01 09:34:39,108 [INFO] 训练 14500, 回合 0.9478009000017186, 存储大小 50000, 损失 0.0018315776251256466\n",
      "2019-01-01 09:34:51,384 [INFO] 回合 111, 步数 626, 奖励 160.0, 总步数 58204\n",
      "2019-01-01 09:35:32,771 [INFO] 训练 14600, 回合 0.9474409000017304, 存储大小 50000, 损失 1.8532145986682735e-05\n",
      "2019-01-01 09:36:06,005 [INFO] 回合 112, 步数 555, 奖励 60.0, 总步数 58760\n",
      "2019-01-01 09:36:26,619 [INFO] 训练 14700, 回合 0.9470809000017423, 存储大小 50000, 损失 1.1205530427105259e-05\n",
      "2019-01-01 09:37:20,866 [INFO] 训练 14800, 回合 0.9467209000017541, 存储大小 50000, 损失 1.0530628060223535e-05\n",
      "2019-01-01 09:37:36,910 [INFO] 回合 113, 步数 670, 奖励 140.0, 总步数 59431\n",
      "2019-01-01 09:38:15,303 [INFO] 训练 14900, 回合 0.946360900001766, 存储大小 50000, 损失 6.150780791358557e-06\n",
      "2019-01-01 09:38:54,214 [INFO] 回合 114, 步数 574, 奖励 60.0, 总步数 60006\n",
      "2019-01-01 09:39:09,122 [INFO] 训练 15000, 回合 0.9460009000017778, 存储大小 50000, 损失 0.0017048550071194768\n",
      "2019-01-01 09:40:03,002 [INFO] 训练 15100, 回合 0.9456409000017897, 存储大小 50000, 损失 1.1756888852687553e-05\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 09:40:18,206 [INFO] 回合 115, 步数 624, 奖励 160.0, 总步数 60631\n",
      "2019-01-01 09:40:56,892 [INFO] 训练 15200, 回合 0.9452809000018015, 存储大小 50000, 损失 0.0018040966242551804\n",
      "2019-01-01 09:41:34,635 [INFO] 回合 116, 步数 563, 奖励 100.0, 总步数 61195\n",
      "2019-01-01 09:41:51,421 [INFO] 训练 15300, 回合 0.9449209000018134, 存储大小 50000, 损失 0.0017157591646537185\n",
      "2019-01-01 09:42:43,687 [INFO] 训练 15400, 回合 0.9445609000018252, 存储大小 50000, 损失 1.3229073374532163e-05\n",
      "2019-01-01 09:42:55,382 [INFO] 回合 117, 步数 609, 奖励 140.0, 总步数 61805\n",
      "2019-01-01 09:43:37,855 [INFO] 训练 15500, 回合 0.9442009000018371, 存储大小 50000, 损失 9.710726772027556e-06\n",
      "2019-01-01 09:43:50,650 [INFO] 回合 118, 步数 409, 奖励 60.0, 总步数 62215\n",
      "2019-01-01 09:44:31,759 [INFO] 训练 15600, 回合 0.943840900001849, 存储大小 50000, 损失 2.3481705284211785e-05\n",
      "2019-01-01 09:44:56,408 [INFO] 回合 119, 步数 487, 奖励 120.0, 总步数 62703\n",
      "2019-01-01 09:45:26,179 [INFO] 训练 15700, 回合 0.9434809000018608, 存储大小 50000, 损失 1.0584591109363828e-05\n",
      "2019-01-01 09:46:20,846 [INFO] 训练 15800, 回合 0.9431209000018727, 存储大小 50000, 损失 1.2191247151349671e-05\n",
      "2019-01-01 09:47:14,865 [INFO] 回合 120, 步数 1016, 奖励 200.0, 总步数 63720\n",
      "2019-01-01 09:47:15,322 [INFO] 训练 15900, 回合 0.9427609000018845, 存储大小 50000, 损失 1.3224185750004835e-05\n",
      "2019-01-01 09:48:09,864 [INFO] 训练 16000, 回合 0.9424009000018964, 存储大小 50000, 损失 1.634878572076559e-05\n",
      "2019-01-01 09:48:09,928 [INFO] 目标网络已更新\n",
      "2019-01-01 09:48:35,801 [INFO] 回合 121, 步数 593, 奖励 80.0, 总步数 64314\n",
      "2019-01-01 09:49:04,014 [INFO] 训练 16100, 回合 0.9420409000019082, 存储大小 50000, 损失 2.4883604055503383e-05\n",
      "2019-01-01 09:49:35,541 [INFO] 回合 122, 步数 442, 奖励 80.0, 总步数 64757\n",
      "2019-01-01 09:49:58,553 [INFO] 训练 16200, 回合 0.9416809000019201, 存储大小 50000, 损失 1.2987589798285626e-05\n",
      "2019-01-01 09:50:42,950 [INFO] 回合 123, 步数 498, 奖励 100.0, 总步数 65256\n",
      "2019-01-01 09:50:52,477 [INFO] 训练 16300, 回合 0.9413209000019319, 存储大小 50000, 损失 0.0017961052944883704\n",
      "2019-01-01 09:51:47,609 [INFO] 训练 16400, 回合 0.9409609000019438, 存储大小 50000, 损失 0.0035444062668830156\n",
      "2019-01-01 09:51:56,549 [INFO] 回合 124, 步数 533, 奖励 120.0, 总步数 65790\n",
      "2019-01-01 09:52:42,415 [INFO] 训练 16500, 回合 0.9406009000019556, 存储大小 50000, 损失 2.783703530440107e-05\n",
      "2019-01-01 09:53:37,176 [INFO] 训练 16600, 回合 0.9402409000019675, 存储大小 50000, 损失 7.52158439354389e-06\n",
      "2019-01-01 09:53:46,326 [INFO] 回合 125, 步数 804, 奖励 100.0, 总步数 66595\n",
      "2019-01-01 09:54:31,442 [INFO] 训练 16700, 回合 0.9398809000019793, 存储大小 50000, 损失 1.9565184629755095e-05\n",
      "2019-01-01 09:55:15,525 [INFO] 回合 126, 步数 659, 奖励 180.0, 总步数 67255\n",
      "2019-01-01 09:55:25,175 [INFO] 训练 16800, 回合 0.9395209000019912, 存储大小 50000, 损失 9.314882845501415e-06\n",
      "2019-01-01 09:56:19,784 [INFO] 训练 16900, 回合 0.939160900002003, 存储大小 50000, 损失 2.1939544240012765e-05\n",
      "2019-01-01 09:56:38,785 [INFO] 回合 127, 步数 612, 奖励 100.0, 总步数 67868\n",
      "2019-01-01 09:57:13,993 [INFO] 训练 17000, 回合 0.9388009000020149, 存储大小 50000, 损失 1.3187011063564569e-05\n",
      "2019-01-01 09:57:40,518 [INFO] 回合 128, 步数 457, 奖励 60.0, 总步数 68326\n",
      "2019-01-01 09:58:07,805 [INFO] 训练 17100, 回合 0.9384409000020267, 存储大小 50000, 损失 1.65113578987075e-05\n",
      "2019-01-01 09:59:02,104 [INFO] 训练 17200, 回合 0.9380809000020386, 存储大小 50000, 损失 0.0018114164704456925\n",
      "2019-01-01 09:59:10,260 [INFO] 回合 129, 步数 664, 奖励 140.0, 总步数 68991\n",
      "2019-01-01 09:59:55,963 [INFO] 训练 17300, 回合 0.9377209000020504, 存储大小 50000, 损失 1.4561039279215038e-05\n",
      "2019-01-01 10:00:20,647 [INFO] 回合 130, 步数 526, 奖励 100.0, 总步数 69518\n",
      "2019-01-01 10:00:49,069 [INFO] 训练 17400, 回合 0.9373609000020623, 存储大小 50000, 损失 1.4193814422469586e-05\n",
      "2019-01-01 10:01:10,063 [INFO] 回合 131, 步数 371, 奖励 20.0, 总步数 69890\n",
      "2019-01-01 10:01:43,189 [INFO] 训练 17500, 回合 0.9370009000020741, 存储大小 50000, 损失 1.9732655346160755e-05\n",
      "2019-01-01 10:02:04,504 [INFO] 回合 132, 步数 400, 奖励 40.0, 总步数 70291\n",
      "2019-01-01 10:02:37,387 [INFO] 训练 17600, 回合 0.936640900002086, 存储大小 50000, 损失 1.7741058400133625e-05\n",
      "2019-01-01 10:02:53,651 [INFO] 回合 133, 步数 364, 奖励 0.0, 总步数 70656\n",
      "2019-01-01 10:03:31,042 [INFO] 训练 17700, 回合 0.9362809000020978, 存储大小 50000, 损失 0.0001756696292432025\n",
      "2019-01-01 10:03:58,139 [INFO] 回合 134, 步数 484, 奖励 80.0, 总步数 71141\n",
      "2019-01-01 10:04:24,694 [INFO] 训练 17800, 回合 0.9359209000021097, 存储大小 50000, 损失 0.00017632590606808662\n",
      "2019-01-01 10:04:57,867 [INFO] 回合 135, 步数 445, 奖励 60.0, 总步数 71587\n",
      "2019-01-01 10:05:18,465 [INFO] 训练 17900, 回合 0.9355609000021216, 存储大小 50000, 损失 1.3305626453075092e-05\n",
      "2019-01-01 10:06:12,772 [INFO] 训练 18000, 回合 0.9352009000021334, 存储大小 50000, 损失 1.5170121514529455e-05\n",
      "2019-01-01 10:06:51,604 [INFO] 回合 136, 步数 844, 奖励 140.0, 总步数 72432\n",
      "2019-01-01 10:07:06,281 [INFO] 训练 18100, 回合 0.9348409000021453, 存储大小 50000, 损失 1.5465633623534814e-05\n",
      "2019-01-01 10:07:41,637 [INFO] 回合 137, 步数 370, 奖励 0.0, 总步数 72803\n",
      "2019-01-01 10:08:00,363 [INFO] 训练 18200, 回合 0.9344809000021571, 存储大小 50000, 损失 1.4634227227361407e-05\n",
      "2019-01-01 10:08:30,107 [INFO] 回合 138, 步数 359, 奖励 0.0, 总步数 73163\n",
      "2019-01-01 10:08:53,243 [INFO] 训练 18300, 回合 0.934120900002169, 存储大小 50000, 损失 9.131146725849248e-06\n",
      "2019-01-01 10:09:46,333 [INFO] 回合 139, 步数 568, 奖励 80.0, 总步数 73732\n",
      "2019-01-01 10:09:47,387 [INFO] 训练 18400, 回合 0.9337609000021808, 存储大小 50000, 损失 0.0018083903705701232\n",
      "2019-01-01 10:10:40,493 [INFO] 训练 18500, 回合 0.9334009000021927, 存储大小 50000, 损失 1.1862895917147398e-05\n",
      "2019-01-01 10:11:00,496 [INFO] 回合 140, 步数 544, 奖励 80.0, 总步数 74277\n",
      "2019-01-01 10:11:38,093 [INFO] 训练 18600, 回合 0.9330409000022045, 存储大小 50000, 损失 1.1489459211588837e-05\n",
      "2019-01-01 10:12:17,397 [INFO] 回合 141, 步数 555, 奖励 140.0, 总步数 74833\n",
      "2019-01-01 10:12:32,749 [INFO] 训练 18700, 回合 0.9326809000022164, 存储大小 50000, 损失 1.2671783224504907e-05\n",
      "2019-01-01 10:13:02,841 [INFO] [测试] 回合 0: 步骤 411, 奖励 0.0, 步数 411\n",
      "2019-01-01 10:13:26,791 [INFO] [测试] 回合 1: 步骤 421, 奖励 60.0, 步数 832\n",
      "2019-01-01 10:13:57,240 [INFO] [测试] 回合 2: 步骤 597, 奖励 0.0, 步数 1429\n",
      "2019-01-01 10:14:22,020 [INFO] [测试] 回合 3: 步骤 562, 奖励 60.0, 步数 1991\n",
      "2019-01-01 10:14:39,744 [INFO] [测试] 回合 4: 步骤 405, 奖励 20.0, 步数 2396\n",
      "2019-01-01 10:15:02,975 [INFO] [测试] 回合 5: 步骤 518, 奖励 60.0, 步数 2914\n",
      "2019-01-01 10:15:18,522 [INFO] [测试] 回合 6: 步骤 360, 奖励 0.0, 步数 3274\n",
      "2019-01-01 10:15:41,663 [INFO] [测试] 回合 7: 步骤 519, 奖励 80.0, 步数 3793\n",
      "2019-01-01 10:16:00,602 [INFO] [测试] 回合 8: 步骤 422, 奖励 0.0, 步数 4215\n",
      "2019-01-01 10:16:24,655 [INFO] [测试] 回合 9: 步骤 557, 奖励 20.0, 步数 4772\n",
      "2019-01-01 10:16:43,733 [INFO] [测试] 回合 10: 步骤 444, 奖励 40.0, 步数 5216\n",
      "2019-01-01 10:17:01,539 [INFO] [测试] 回合 11: 步骤 420, 奖励 20.0, 步数 5636\n",
      "2019-01-01 10:17:21,039 [INFO] [测试] 回合 12: 步骤 435, 奖励 20.0, 步数 6071\n",
      "2019-01-01 10:17:39,498 [INFO] [测试] 回合 13: 步骤 408, 奖励 20.0, 步数 6479\n",
      "2019-01-01 10:18:01,264 [INFO] [测试] 回合 14: 步骤 483, 奖励 0.0, 步数 6962\n",
      "2019-01-01 10:18:22,787 [INFO] [测试] 回合 15: 步骤 479, 奖励 40.0, 步数 7441\n",
      "2019-01-01 10:18:45,448 [INFO] [测试] 回合 16: 步骤 499, 奖励 20.0, 步数 7940\n",
      "2019-01-01 10:19:08,128 [INFO] [测试] 回合 17: 步骤 497, 奖励 60.0, 步数 8437\n",
      "2019-01-01 10:19:34,559 [INFO] [测试] 回合 18: 步骤 597, 奖励 80.0, 步数 9034\n",
      "2019-01-01 10:19:57,338 [INFO] [测试] 回合 19: 步骤 523, 奖励 0.0, 步数 9557\n",
      "2019-01-01 10:20:19,187 [INFO] [测试] 回合 20: 步骤 516, 奖励 60.0, 步数 10073\n",
      "2019-01-01 10:20:36,552 [INFO] [测试] 回合 21: 步骤 411, 奖励 20.0, 步数 10484\n",
      "2019-01-01 10:20:55,659 [INFO] [测试] 回合 22: 步骤 435, 奖励 20.0, 步数 10919\n",
      "2019-01-01 10:21:19,644 [INFO] [测试] 回合 23: 步骤 540, 奖励 60.0, 步数 11459\n",
      "2019-01-01 10:21:40,992 [INFO] [测试] 回合 24: 步骤 491, 奖励 60.0, 步数 11950\n",
      "2019-01-01 10:22:09,237 [INFO] [测试] 回合 25: 步骤 629, 奖励 20.0, 步数 12579\n",
      "2019-01-01 10:22:32,822 [INFO] [测试] 回合 26: 步骤 535, 奖励 80.0, 步数 13114\n",
      "2019-01-01 10:22:46,989 [INFO] [测试] 回合 27: 步骤 328, 奖励 0.0, 步数 13442\n",
      "2019-01-01 10:23:04,692 [INFO] [测试] 回合 28: 步骤 410, 奖励 60.0, 步数 13852\n",
      "2019-01-01 10:23:23,697 [INFO] [测试] 回合 29: 步骤 439, 奖励 60.0, 步数 14291\n",
      "2019-01-01 10:23:55,781 [INFO] [测试] 回合 30: 步骤 734, 奖励 0.0, 步数 15025\n",
      "2019-01-01 10:24:18,085 [INFO] [测试] 回合 31: 步骤 516, 奖励 20.0, 步数 15541\n",
      "2019-01-01 10:24:39,291 [INFO] [测试] 回合 32: 步骤 482, 奖励 40.0, 步数 16023\n",
      "2019-01-01 10:24:54,917 [INFO] [测试] 回合 33: 步骤 362, 奖励 0.0, 步数 16385\n",
      "2019-01-01 10:25:13,574 [INFO] [测试] 回合 34: 步骤 435, 奖励 40.0, 步数 16820\n",
      "2019-01-01 10:25:37,068 [INFO] [测试] 回合 35: 步骤 531, 奖励 120.0, 步数 17351\n",
      "2019-01-01 10:25:55,898 [INFO] [测试] 回合 36: 步骤 410, 奖励 0.0, 步数 17761\n",
      "2019-01-01 10:26:14,324 [INFO] [测试] 回合 37: 步骤 394, 奖励 0.0, 步数 18155\n",
      "2019-01-01 10:26:37,132 [INFO] [测试] 回合 38: 步骤 514, 奖励 60.0, 步数 18669\n",
      "2019-01-01 10:26:51,002 [INFO] [测试] 回合 39: 步骤 326, 奖励 0.0, 步数 18995\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 10:27:07,081 [INFO] [测试] 回合 40: 步骤 328, 奖励 20.0, 步数 19323\n",
      "2019-01-01 10:27:26,288 [INFO] [测试] 回合 41: 步骤 410, 奖励 60.0, 步数 19733\n",
      "2019-01-01 10:27:45,515 [INFO] [测试] 回合 42: 步骤 435, 奖励 60.0, 步数 20168\n",
      "2019-01-01 10:28:09,252 [INFO] [测试] 回合 43: 步骤 538, 奖励 80.0, 步数 20706\n",
      "2019-01-01 10:28:22,799 [INFO] [测试] 回合 44: 步骤 328, 奖励 0.0, 步数 21034\n",
      "2019-01-01 10:28:43,268 [INFO] [测试] 回合 45: 步骤 459, 奖励 60.0, 步数 21493\n",
      "2019-01-01 10:29:00,666 [INFO] [测试] 回合 46: 步骤 418, 奖励 40.0, 步数 21911\n",
      "2019-01-01 10:29:17,009 [INFO] [测试] 回合 47: 步骤 403, 奖励 0.0, 步数 22314\n",
      "2019-01-01 10:29:38,776 [INFO] [测试] 回合 48: 步骤 507, 奖励 60.0, 步数 22821\n",
      "2019-01-01 10:29:58,297 [INFO] [测试] 回合 49: 步骤 477, 奖励 0.0, 步数 23298\n",
      "2019-01-01 10:29:58,299 [INFO] [测试小结] 步数: 平均 = 465.96, 最小 = 326, 最大 = 734.\n",
      "2019-01-01 10:29:58,302 [INFO] [测试小结] 奖励: 平均 = 34.0, 最小 = 0.0, 最大 = 120.0\n",
      "2019-01-01 10:29:58,333 [INFO] 回合 142, 步数 168, 奖励 0.0, 总步数 75002\n",
      "2019-01-01 10:30:37,557 [INFO] 训练 18800, 回合 0.9323209000022282, 存储大小 50000, 损失 8.581890142522752e-06\n",
      "2019-01-01 10:30:57,439 [INFO] 回合 143, 步数 519, 奖励 100.0, 总步数 75522\n",
      "2019-01-01 10:31:22,253 [INFO] 训练 18900, 回合 0.9319609000022401, 存储大小 50000, 损失 9.333452908322215e-06\n",
      "2019-01-01 10:32:07,269 [INFO] 训练 19000, 回合 0.9316009000022519, 存储大小 50000, 损失 2.0385979951242916e-05\n",
      "2019-01-01 10:32:53,133 [INFO] 训练 19100, 回合 0.9312409000022638, 存储大小 50000, 损失 2.1697966076317243e-05\n",
      "2019-01-01 10:33:01,152 [INFO] 回合 144, 步数 1094, 奖励 220.0, 总步数 76617\n",
      "2019-01-01 10:33:38,110 [INFO] 训练 19200, 回合 0.9308809000022756, 存储大小 50000, 损失 1.1400199582567438e-05\n",
      "2019-01-01 10:33:42,731 [INFO] 回合 145, 步数 369, 奖励 20.0, 总步数 76987\n",
      "2019-01-01 10:34:23,095 [INFO] 训练 19300, 回合 0.9305209000022875, 存储大小 50000, 损失 1.863478246377781e-05\n",
      "2019-01-01 10:34:24,953 [INFO] 回合 146, 步数 377, 奖励 20.0, 总步数 77365\n",
      "2019-01-01 10:35:08,062 [INFO] 训练 19400, 回合 0.9301609000022993, 存储大小 50000, 损失 1.2556743968161754e-05\n",
      "2019-01-01 10:35:08,127 [INFO] 回合 147, 步数 384, 奖励 0.0, 总步数 77750\n",
      "2019-01-01 10:35:52,520 [INFO] 回合 148, 步数 396, 奖励 20.0, 总步数 78147\n",
      "2019-01-01 10:35:52,949 [INFO] 训练 19500, 回合 0.9298009000023112, 存储大小 50000, 损失 1.0922338333330117e-05\n",
      "2019-01-01 10:36:35,134 [INFO] 回合 149, 步数 381, 奖励 20.0, 总步数 78529\n",
      "2019-01-01 10:36:37,766 [INFO] 训练 19600, 回合 0.929440900002323, 存储大小 50000, 损失 9.829605005506892e-06\n",
      "2019-01-01 10:37:25,181 [INFO] 训练 19700, 回合 0.9290809000023349, 存储大小 50000, 损失 0.0017652662936598063\n",
      "2019-01-01 10:37:44,619 [INFO] 回合 150, 步数 594, 奖励 60.0, 总步数 79124\n",
      "2019-01-01 10:38:09,994 [INFO] 训练 19800, 回合 0.9287209000023467, 存储大小 50000, 损失 7.311234639928443e-06\n",
      "2019-01-01 10:38:55,384 [INFO] 训练 19900, 回合 0.9283609000023586, 存储大小 50000, 损失 1.1068569619965274e-05\n",
      "2019-01-01 10:39:07,173 [INFO] 回合 151, 步数 733, 奖励 140.0, 总步数 79858\n",
      "2019-01-01 10:39:40,740 [INFO] 训练 20000, 回合 0.9280009000023705, 存储大小 50000, 损失 0.0016739361453801394\n",
      "2019-01-01 10:39:40,798 [INFO] 目标网络已更新\n",
      "2019-01-01 10:40:08,407 [INFO] 回合 152, 步数 538, 奖励 80.0, 总步数 80397\n",
      "2019-01-01 10:40:26,504 [INFO] 训练 20100, 回合 0.9276409000023823, 存储大小 50000, 损失 1.0070200005429797e-05\n",
      "2019-01-01 10:41:10,913 [INFO] 回合 153, 步数 554, 奖励 100.0, 总步数 80952\n",
      "2019-01-01 10:41:11,324 [INFO] 训练 20200, 回合 0.9272809000023942, 存储大小 50000, 损失 1.0955620382446796e-05\n",
      "2019-01-01 10:41:56,054 [INFO] 训练 20300, 回合 0.926920900002406, 存储大小 50000, 损失 1.6702309949323535e-05\n",
      "2019-01-01 10:42:39,581 [INFO] 回合 154, 步数 792, 奖励 160.0, 总步数 81745\n",
      "2019-01-01 10:42:40,916 [INFO] 训练 20400, 回合 0.9265609000024179, 存储大小 50000, 损失 2.3484535631723702e-05\n",
      "2019-01-01 10:43:26,226 [INFO] 训练 20500, 回合 0.9262009000024297, 存储大小 50000, 损失 1.075223735824693e-05\n",
      "2019-01-01 10:43:34,727 [INFO] 回合 155, 步数 488, 奖励 120.0, 总步数 82234\n",
      "2019-01-01 10:44:11,003 [INFO] 训练 20600, 回合 0.9258409000024416, 存储大小 50000, 损失 1.8089747754856944e-05\n",
      "2019-01-01 10:44:23,321 [INFO] 回合 156, 步数 433, 奖励 60.0, 总步数 82668\n",
      "2019-01-01 10:44:55,998 [INFO] 训练 20700, 回合 0.9254809000024534, 存储大小 50000, 损失 9.706690434541088e-06\n",
      "2019-01-01 10:45:40,580 [INFO] 训练 20800, 回合 0.9251209000024653, 存储大小 50000, 损失 0.0017256529536098242\n",
      "2019-01-01 10:45:41,186 [INFO] 回合 157, 步数 696, 奖励 140.0, 总步数 83365\n",
      "2019-01-01 10:46:24,681 [INFO] 训练 20900, 回合 0.9247609000024771, 存储大小 50000, 损失 1.0859655958483927e-05\n",
      "2019-01-01 10:46:34,138 [INFO] 回合 158, 步数 477, 奖励 100.0, 总步数 83843\n",
      "2019-01-01 10:47:09,220 [INFO] 训练 21000, 回合 0.924400900002489, 存储大小 50000, 损失 0.0017757797613739967\n",
      "2019-01-01 10:47:25,409 [INFO] 回合 159, 步数 461, 奖励 80.0, 总步数 84305\n",
      "2019-01-01 10:47:54,158 [INFO] 训练 21100, 回合 0.9240409000025008, 存储大小 50000, 损失 1.0345076589146629e-05\n",
      "2019-01-01 10:48:36,296 [INFO] 回合 160, 步数 633, 奖励 80.0, 总步数 84939\n",
      "2019-01-01 10:48:39,367 [INFO] 训练 21200, 回合 0.9236809000025127, 存储大小 50000, 损失 1.4067729352973402e-05\n",
      "2019-01-01 10:49:31,032 [INFO] 训练 21300, 回合 0.9233209000025245, 存储大小 50000, 损失 2.3632628654013388e-05\n",
      "2019-01-01 10:50:01,661 [INFO] 回合 161, 步数 654, 奖励 80.0, 总步数 85594\n",
      "2019-01-01 10:50:23,766 [INFO] 训练 21400, 回合 0.9229609000025364, 存储大小 50000, 损失 1.5785248251631856e-05\n",
      "2019-01-01 10:51:16,498 [INFO] 训练 21500, 回合 0.9226009000025482, 存储大小 50000, 损失 9.076366950466763e-06\n",
      "2019-01-01 10:51:19,021 [INFO] 回合 162, 步数 588, 奖励 80.0, 总步数 86183\n",
      "2019-01-01 10:52:08,964 [INFO] 训练 21600, 回合 0.9222409000025601, 存储大小 50000, 损失 1.9594008335843682e-05\n",
      "2019-01-01 10:52:31,192 [INFO] 回合 163, 步数 544, 奖励 100.0, 总步数 86728\n",
      "2019-01-01 10:53:04,999 [INFO] 训练 21700, 回合 0.921880900002572, 存储大小 50000, 损失 8.564382369513623e-06\n",
      "2019-01-01 10:53:53,392 [INFO] 回合 164, 步数 569, 奖励 100.0, 总步数 87298\n",
      "2019-01-01 10:54:02,819 [INFO] 训练 21800, 回合 0.9215209000025838, 存储大小 50000, 损失 1.1019672456313856e-05\n",
      "2019-01-01 10:55:00,966 [INFO] 回合 165, 步数 466, 奖励 60.0, 总步数 87765\n",
      "2019-01-01 10:55:01,473 [INFO] 训练 21900, 回合 0.9211609000025957, 存储大小 50000, 损失 1.651890306675341e-05\n",
      "2019-01-01 10:55:53,318 [INFO] 回合 166, 步数 371, 奖励 20.0, 总步数 88137\n",
      "2019-01-01 10:55:57,485 [INFO] 训练 22000, 回合 0.9208009000026075, 存储大小 50000, 损失 7.201361313491361e-06\n",
      "2019-01-01 10:56:53,803 [INFO] 训练 22100, 回合 0.9204409000026194, 存储大小 50000, 损失 1.4444957741943654e-05\n",
      "2019-01-01 10:57:03,972 [INFO] 回合 167, 步数 508, 奖励 60.0, 总步数 88646\n",
      "2019-01-01 10:57:47,292 [INFO] 训练 22200, 回合 0.9200809000026312, 存储大小 50000, 损失 1.3615252100862563e-05\n",
      "2019-01-01 10:58:06,582 [INFO] 回合 168, 步数 469, 奖励 100.0, 总步数 89116\n",
      "2019-01-01 10:58:40,363 [INFO] 训练 22300, 回合 0.9197209000026431, 存储大小 50000, 损失 0.0017723665805533528\n",
      "2019-01-01 10:59:15,299 [INFO] 回合 169, 步数 516, 奖励 100.0, 总步数 89633\n",
      "2019-01-01 10:59:33,735 [INFO] 训练 22400, 回合 0.9193609000026549, 存储大小 50000, 损失 0.0016578794457018375\n",
      "2019-01-01 11:00:24,318 [INFO] 回合 170, 步数 517, 奖励 100.0, 总步数 90151\n",
      "2019-01-01 11:00:26,972 [INFO] 训练 22500, 回合 0.9190009000026668, 存储大小 50000, 损失 1.618674286874011e-05\n",
      "2019-01-01 11:01:20,789 [INFO] 训练 22600, 回合 0.9186409000026786, 存储大小 50000, 损失 1.0155603376915678e-05\n",
      "2019-01-01 11:01:36,595 [INFO] 回合 171, 步数 543, 奖励 120.0, 总步数 90695\n",
      "2019-01-01 11:02:13,642 [INFO] 训练 22700, 回合 0.9182809000026905, 存储大小 50000, 损失 1.1360957614670042e-05\n",
      "2019-01-01 11:03:06,360 [INFO] 训练 22800, 回合 0.9179209000027023, 存储大小 50000, 损失 1.5755937056383118e-05\n",
      "2019-01-01 11:03:16,766 [INFO] 回合 172, 步数 755, 奖励 80.0, 总步数 91451\n",
      "2019-01-01 11:04:01,024 [INFO] 训练 22900, 回合 0.9175609000027142, 存储大小 50000, 损失 2.0242332539055496e-05\n",
      "2019-01-01 11:04:57,616 [INFO] 训练 23000, 回合 0.917200900002726, 存储大小 50000, 损失 1.748383692756761e-05\n",
      "2019-01-01 11:05:43,311 [INFO] 回合 173, 步数 1066, 奖励 260.0, 总步数 92518\n",
      "2019-01-01 11:05:50,783 [INFO] 训练 23100, 回合 0.9168409000027379, 存储大小 50000, 损失 1.8730948795564473e-05\n",
      "2019-01-01 11:06:45,264 [INFO] 训练 23200, 回合 0.9164809000027497, 存储大小 50000, 损失 2.74117301160004e-05\n",
      "2019-01-01 11:06:50,165 [INFO] 回合 174, 步数 495, 奖励 0.0, 总步数 93014\n",
      "2019-01-01 11:07:38,261 [INFO] 训练 23300, 回合 0.9161209000027616, 存储大小 50000, 损失 1.0518229828448966e-05\n",
      "2019-01-01 11:08:03,083 [INFO] 回合 175, 步数 549, 奖励 60.0, 总步数 93564\n",
      "2019-01-01 11:08:31,243 [INFO] 训练 23400, 回合 0.9157609000027734, 存储大小 50000, 损失 0.001739619649015367\n",
      "2019-01-01 11:08:51,159 [INFO] 回合 176, 步数 363, 奖励 20.0, 总步数 93928\n",
      "2019-01-01 11:09:24,188 [INFO] 训练 23500, 回合 0.9154009000027853, 存储大小 50000, 损失 0.0017439161892980337\n",
      "2019-01-01 11:10:08,858 [INFO] 回合 177, 步数 581, 奖励 100.0, 总步数 94510\n",
      "2019-01-01 11:10:18,680 [INFO] 训练 23600, 回合 0.9150409000027971, 存储大小 50000, 损失 1.8818154785549268e-05\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 11:11:15,904 [INFO] 训练 23700, 回合 0.914680900002809, 存储大小 50000, 损失 1.3763721653958783e-05\n",
      "2019-01-01 11:11:33,484 [INFO] 回合 178, 步数 602, 奖励 80.0, 总步数 95113\n",
      "2019-01-01 11:12:07,915 [INFO] 训练 23800, 回合 0.9143209000028208, 存储大小 50000, 损失 0.00019114372844342142\n",
      "2019-01-01 11:12:54,305 [INFO] 回合 179, 步数 619, 奖励 120.0, 总步数 95733\n",
      "2019-01-01 11:13:00,521 [INFO] 训练 23900, 回合 0.9139609000028327, 存储大小 50000, 损失 1.2359452739474364e-05\n",
      "2019-01-01 11:13:53,473 [INFO] 训练 24000, 回合 0.9136009000028446, 存储大小 50000, 损失 1.3050831512373406e-05\n",
      "2019-01-01 11:13:53,536 [INFO] 目标网络已更新\n",
      "2019-01-01 11:14:27,230 [INFO] 回合 180, 步数 703, 奖励 100.0, 总步数 96437\n",
      "2019-01-01 11:14:46,153 [INFO] 训练 24100, 回合 0.9132409000028564, 存储大小 50000, 损失 1.308241280639777e-05\n",
      "2019-01-01 11:15:40,715 [INFO] 训练 24200, 回合 0.9128809000028683, 存储大小 50000, 损失 2.701804623939097e-05\n",
      "2019-01-01 11:15:43,905 [INFO] 回合 181, 步数 570, 奖励 120.0, 总步数 97008\n",
      "2019-01-01 11:16:35,659 [INFO] 训练 24300, 回合 0.9125209000028801, 存储大小 50000, 损失 0.001846031635068357\n",
      "2019-01-01 11:17:11,205 [INFO] 回合 182, 步数 641, 奖励 100.0, 总步数 97650\n",
      "2019-01-01 11:17:29,028 [INFO] 训练 24400, 回合 0.912160900002892, 存储大小 50000, 损失 2.9202434234321117e-05\n",
      "2019-01-01 11:18:18,622 [INFO] 回合 183, 步数 506, 奖励 100.0, 总步数 98157\n",
      "2019-01-01 11:18:22,182 [INFO] 训练 24500, 回合 0.9118009000029038, 存储大小 50000, 损失 1.1557314792298712e-05\n",
      "2019-01-01 11:19:15,079 [INFO] 训练 24600, 回合 0.9114409000029157, 存储大小 50000, 损失 1.1824226021417417e-05\n",
      "2019-01-01 11:19:40,707 [INFO] 回合 184, 步数 620, 奖励 120.0, 总步数 98778\n",
      "2019-01-01 11:20:08,038 [INFO] 训练 24700, 回合 0.9110809000029275, 存储大小 50000, 损失 1.4322147762868553e-05\n",
      "2019-01-01 11:20:30,742 [INFO] 回合 185, 步数 377, 奖励 20.0, 总步数 99156\n",
      "2019-01-01 11:21:00,685 [INFO] 训练 24800, 回合 0.9107209000029394, 存储大小 50000, 损失 1.0957536687783431e-05\n",
      "2019-01-01 11:21:52,749 [INFO] 训练 24900, 回合 0.9103609000029512, 存储大小 50000, 损失 1.8067283235723153e-05\n",
      "2019-01-01 11:22:00,349 [INFO] 回合 186, 步数 688, 奖励 100.0, 总步数 99845\n",
      "2019-01-01 11:23:01,780 [INFO] [测试] 回合 0: 步骤 819, 奖励 160.0, 步数 819\n",
      "2019-01-01 11:23:22,428 [INFO] [测试] 回合 1: 步骤 458, 奖励 80.0, 步数 1277\n",
      "2019-01-01 11:23:53,628 [INFO] [测试] 回合 2: 步骤 659, 奖励 180.0, 步数 1936\n",
      "2019-01-01 11:24:21,014 [INFO] [测试] 回合 3: 步骤 595, 奖励 120.0, 步数 2531\n",
      "2019-01-01 11:24:47,618 [INFO] [测试] 回合 4: 步骤 560, 奖励 100.0, 步数 3091\n",
      "2019-01-01 11:25:32,702 [INFO] [测试] 回合 5: 步骤 952, 奖励 280.0, 步数 4043\n",
      "2019-01-01 11:26:04,781 [INFO] [测试] 回合 6: 步骤 696, 奖励 160.0, 步数 4739\n",
      "2019-01-01 11:26:34,861 [INFO] [测试] 回合 7: 步骤 639, 奖励 180.0, 步数 5378\n",
      "2019-01-01 11:27:25,162 [INFO] [测试] 回合 8: 步骤 1035, 奖励 260.0, 步数 6413\n",
      "2019-01-01 11:27:54,584 [INFO] [测试] 回合 9: 步骤 633, 奖励 180.0, 步数 7046\n",
      "2019-01-01 11:28:38,711 [INFO] [测试] 回合 10: 步骤 938, 奖励 260.0, 步数 7984\n",
      "2019-01-01 11:29:26,229 [INFO] [测试] 回合 11: 步骤 991, 奖励 280.0, 步数 8975\n",
      "2019-01-01 11:29:56,372 [INFO] [测试] 回合 12: 步骤 623, 奖励 140.0, 步数 9598\n",
      "2019-01-01 11:30:41,918 [INFO] [测试] 回合 13: 步骤 945, 奖励 200.0, 步数 10543\n",
      "2019-01-01 11:31:18,416 [INFO] [测试] 回合 14: 步骤 776, 奖励 220.0, 步数 11319\n",
      "2019-01-01 11:32:08,148 [INFO] [测试] 回合 15: 步骤 1040, 奖励 260.0, 步数 12359\n",
      "2019-01-01 11:32:40,994 [INFO] [测试] 回合 16: 步骤 711, 奖励 240.0, 步数 13070\n",
      "2019-01-01 11:33:21,085 [INFO] [测试] 回合 17: 步骤 840, 奖励 180.0, 步数 13910\n",
      "2019-01-01 11:34:14,298 [INFO] [测试] 回合 18: 步骤 1098, 奖励 340.0, 步数 15008\n",
      "2019-01-01 11:34:42,422 [INFO] [测试] 回合 19: 步骤 604, 奖励 120.0, 步数 15612\n",
      "2019-01-01 11:35:15,460 [INFO] [测试] 回合 20: 步骤 691, 奖励 160.0, 步数 16303\n",
      "2019-01-01 11:35:47,666 [INFO] [测试] 回合 21: 步骤 674, 奖励 140.0, 步数 16977\n",
      "2019-01-01 11:36:11,547 [INFO] [测试] 回合 22: 步骤 506, 奖励 120.0, 步数 17483\n",
      "2019-01-01 11:36:57,816 [INFO] [测试] 回合 23: 步骤 988, 奖励 160.0, 步数 18471\n",
      "2019-01-01 11:37:41,288 [INFO] [测试] 回合 24: 步骤 882, 奖励 280.0, 步数 19353\n",
      "2019-01-01 11:38:09,683 [INFO] [测试] 回合 25: 步骤 584, 奖励 100.0, 步数 19937\n",
      "2019-01-01 11:38:39,306 [INFO] [测试] 回合 26: 步骤 624, 奖励 140.0, 步数 20561\n",
      "2019-01-01 11:39:04,544 [INFO] [测试] 回合 27: 步骤 549, 奖励 120.0, 步数 21110\n",
      "2019-01-01 11:39:51,299 [INFO] [测试] 回合 28: 步骤 985, 奖励 240.0, 步数 22095\n",
      "2019-01-01 11:40:28,204 [INFO] [测试] 回合 29: 步骤 803, 奖励 220.0, 步数 22898\n",
      "2019-01-01 11:40:51,394 [INFO] [测试] 回合 30: 步骤 515, 奖励 80.0, 步数 23413\n",
      "2019-01-01 11:41:21,314 [INFO] [测试] 回合 31: 步骤 750, 奖励 180.0, 步数 24163\n",
      "2019-01-01 11:42:13,558 [INFO] [测试] 回合 32: 步骤 1380, 奖励 360.0, 步数 25543\n",
      "2019-01-01 11:42:40,871 [INFO] [测试] 回合 33: 步骤 714, 奖励 200.0, 步数 26257\n",
      "2019-01-01 11:43:12,962 [INFO] [测试] 回合 34: 步骤 872, 奖励 260.0, 步数 27129\n",
      "2019-01-01 11:43:37,021 [INFO] [测试] 回合 35: 步骤 656, 奖励 140.0, 步数 27785\n",
      "2019-01-01 11:43:58,703 [INFO] [测试] 回合 36: 步骤 587, 奖励 160.0, 步数 28372\n",
      "2019-01-01 11:44:14,777 [INFO] [测试] 回合 37: 步骤 451, 奖励 120.0, 步数 28823\n",
      "2019-01-01 11:44:51,272 [INFO] [测试] 回合 38: 步骤 1004, 奖励 280.0, 步数 29827\n",
      "2019-01-01 11:45:17,644 [INFO] [测试] 回合 39: 步骤 738, 奖励 140.0, 步数 30565\n",
      "2019-01-01 11:45:42,050 [INFO] [测试] 回合 40: 步骤 658, 奖励 160.0, 步数 31223\n",
      "2019-01-01 11:46:27,754 [INFO] [测试] 回合 41: 步骤 1230, 奖励 320.0, 步数 32453\n",
      "2019-01-01 11:46:55,174 [INFO] [测试] 回合 42: 步骤 737, 奖励 160.0, 步数 33190\n",
      "2019-01-01 11:47:14,561 [INFO] [测试] 回合 43: 步骤 515, 奖励 120.0, 步数 33705\n",
      "2019-01-01 11:47:47,826 [INFO] [测试] 回合 44: 步骤 891, 奖励 260.0, 步数 34596\n",
      "2019-01-01 11:48:07,994 [INFO] [测试] 回合 45: 步骤 557, 奖励 100.0, 步数 35153\n",
      "2019-01-01 11:48:26,826 [INFO] [测试] 回合 46: 步骤 512, 奖励 80.0, 步数 35665\n",
      "2019-01-01 11:49:04,698 [INFO] [测试] 回合 47: 步骤 1026, 奖励 380.0, 步数 36691\n",
      "2019-01-01 11:49:35,177 [INFO] [测试] 回合 48: 步骤 798, 奖励 180.0, 步数 37489\n",
      "2019-01-01 11:50:04,967 [INFO] [测试] 回合 49: 步骤 813, 奖励 200.0, 步数 38302\n",
      "2019-01-01 11:50:04,970 [INFO] [测试小结] 步数: 平均 = 766.04, 最小 = 451, 最大 = 1380.\n",
      "2019-01-01 11:50:04,976 [INFO] [测试小结] 奖励: 平均 = 190.0, 最小 = 80.0, 最大 = 380.0\n",
      "2019-01-01 11:50:05,043 [INFO] 回合 187, 步数 156, 奖励 40.0, 总步数 100002\n",
      "2019-01-01 11:50:05,051 [INFO] 训练结束\n"
     ]
    }
   ],
   "source": [
    "agent = DQNAgent(env, input_shape=input_shape, batch_size=batch_size,\n",
    "        replay_memory_size=replay_memory_size,\n",
    "        learning_rate=learning_rate, gamma=gamma,\n",
    "        epsilon=epsilon, epsilon_decrease_rate=epsilon_decrease,\n",
    "        min_epsilon=min_epsilon, random_inital_steps=random_inital_steps,\n",
    "        load_path=load_path,\n",
    "        update_freq=update_freq,\n",
    "        target_network_update_freq=target_network_update_freq)\n",
    "\n",
    "logging.info(\"训练开始\")\n",
    "\n",
    "frame = 0\n",
    "max_mean_episode_reward = float(\"-inf\")\n",
    "for episode in itertools.count():\n",
    "    observation = env.reset()\n",
    "    episode_reward = 0\n",
    "    state = agent.get_next_state(None, observation)\n",
    "    for step in itertools.count():\n",
    "        if render:\n",
    "            env.render()\n",
    "        frame += 1\n",
    "        action = agent.decide(state, step=step)\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        next_state = agent.get_next_state(state, observation)\n",
    "        episode_reward += reward\n",
    "        agent.learn(state, action, reward, next_state, done)\n",
    "        \n",
    "        # 验证\n",
    "        if frame % test_freq == 0 or \\\n",
    "                (done and (frame + 1) % test_freq == 0):\n",
    "            test_episode_rewards = test(env=env,\n",
    "                    agent=agent, episodes=test_episodes, render=render)\n",
    "            if max_mean_episode_reward < np.mean(test_episode_rewards):\n",
    "                max_mean_episode_reward = np.mean(test_episode_rewards)\n",
    "                agent.save_network(save_path)\n",
    "                path = save_path[:-2] + str(agent.fit_count) + '.h5'\n",
    "                agent.save_network(path)\n",
    "        \n",
    "        if done:\n",
    "            step += 1\n",
    "            frame += 1\n",
    "            break\n",
    "        state = next_state\n",
    "    \n",
    "    logging.info(\"回合 {}, 步数 {}, 奖励 {}, 总步数 {}\".format(\n",
    "            episode, step, episode_reward, frame))\n",
    "    \n",
    "    if frame > frames:\n",
    "        break\n",
    "\n",
    "logging.info(\"训练结束\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-01-01 11:50:05,601 [INFO] 载入网络权重 ./output/SeaquestDeterministic-v4-20190124-071404/model.h5.\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "permute_2 (Permute)          (None, 110, 84, 4)        0         \n",
      "_________________________________________________________________\n",
      "conv2d_6 (Conv2D)            (None, 26, 20, 32)        8224      \n",
      "_________________________________________________________________\n",
      "conv2d_7 (Conv2D)            (None, 12, 9, 64)         32832     \n",
      "_________________________________________________________________\n",
      "conv2d_8 (Conv2D)            (None, 10, 7, 64)         36928     \n",
      "_________________________________________________________________\n",
      "flatten_2 (Flatten)          (None, 4480)              0         \n",
      "_________________________________________________________________\n",
      "dense_4 (Dense)              (None, 512)               2294272   \n",
      "_________________________________________________________________\n",
      "dense_5 (Dense)              (None, 18)                9234      \n",
      "=================================================================\n",
      "Total params: 2,381,490\n",
      "Trainable params: 2,381,490\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "2019-01-01 11:50:07,345 [INFO] 目标网络已更新\n",
      "2019-01-01 11:50:31,999 [INFO] [测试] 回合 0: 步骤 665, 奖励 160.0, 步数 665\n",
      "2019-01-01 11:51:09,149 [INFO] [测试] 回合 1: 步骤 1028, 奖励 260.0, 步数 1693\n",
      "2019-01-01 11:51:54,676 [INFO] [测试] 回合 2: 步骤 1246, 奖励 380.0, 步数 2939\n",
      "2019-01-01 11:52:29,851 [INFO] [测试] 回合 3: 步骤 987, 奖励 260.0, 步数 3926\n",
      "2019-01-01 11:53:13,409 [INFO] [测试] 回合 4: 步骤 1181, 奖励 320.0, 步数 5107\n",
      "2019-01-01 11:53:31,278 [INFO] [测试] 回合 5: 步骤 505, 奖励 80.0, 步数 5612\n",
      "2019-01-01 11:53:58,749 [INFO] [测试] 回合 6: 步骤 731, 奖励 120.0, 步数 6343\n",
      "2019-01-01 11:54:27,730 [INFO] [测试] 回合 7: 步骤 790, 奖励 240.0, 步数 7133\n",
      "2019-01-01 11:54:59,252 [INFO] [测试] 回合 8: 步骤 844, 奖励 180.0, 步数 7977\n",
      "2019-01-01 11:55:28,158 [INFO] [测试] 回合 9: 步骤 812, 奖励 200.0, 步数 8789\n",
      "2019-01-01 11:55:59,027 [INFO] [测试] 回合 10: 步骤 854, 奖励 260.0, 步数 9643\n",
      "2019-01-01 11:56:26,032 [INFO] [测试] 回合 11: 步骤 731, 奖励 120.0, 步数 10374\n",
      "2019-01-01 11:56:45,539 [INFO] [测试] 回合 12: 步骤 534, 奖励 140.0, 步数 10908\n",
      "2019-01-01 11:57:24,723 [INFO] [测试] 回合 13: 步骤 1039, 奖励 260.0, 步数 11947\n",
      "2019-01-01 11:57:41,533 [INFO] [测试] 回合 14: 步骤 466, 奖励 100.0, 步数 12413\n",
      "2019-01-01 11:58:07,592 [INFO] [测试] 回合 15: 步骤 732, 奖励 140.0, 步数 13145\n",
      "2019-01-01 11:58:26,492 [INFO] [测试] 回合 16: 步骤 515, 奖励 120.0, 步数 13660\n",
      "2019-01-01 11:58:49,083 [INFO] [测试] 回合 17: 步骤 601, 奖励 140.0, 步数 14261\n",
      "2019-01-01 11:59:17,531 [INFO] [测试] 回合 18: 步骤 769, 奖励 200.0, 步数 15030\n",
      "2019-01-01 11:59:41,222 [INFO] [测试] 回合 19: 步骤 653, 奖励 120.0, 步数 15683\n",
      "2019-01-01 12:00:04,969 [INFO] [测试] 回合 20: 步骤 648, 奖励 180.0, 步数 16331\n",
      "2019-01-01 12:00:30,490 [INFO] [测试] 回合 21: 步骤 690, 奖励 140.0, 步数 17021\n",
      "2019-01-01 12:00:51,854 [INFO] [测试] 回合 22: 步骤 576, 奖励 160.0, 步数 17597\n",
      "2019-01-01 12:01:15,574 [INFO] [测试] 回合 23: 步骤 659, 奖励 160.0, 步数 18256\n",
      "2019-01-01 12:01:54,393 [INFO] [测试] 回合 24: 步骤 1054, 奖励 260.0, 步数 19310\n",
      "2019-01-01 12:02:30,929 [INFO] [测试] 回合 25: 步骤 988, 奖励 260.0, 步数 20298\n",
      "2019-01-01 12:02:54,174 [INFO] [测试] 回合 26: 步骤 640, 奖励 220.0, 步数 20938\n",
      "2019-01-01 12:03:15,199 [INFO] [测试] 回合 27: 步骤 584, 奖励 100.0, 步数 21522\n",
      "2019-01-01 12:03:41,783 [INFO] [测试] 回合 28: 步骤 731, 奖励 260.0, 步数 22253\n",
      "2019-01-01 12:04:19,687 [INFO] [测试] 回合 29: 步骤 1035, 奖励 280.0, 步数 23288\n",
      "2019-01-01 12:04:36,251 [INFO] [测试] 回合 30: 步骤 466, 奖励 80.0, 步数 23754\n",
      "2019-01-01 12:05:00,826 [INFO] [测试] 回合 31: 步骤 656, 奖励 120.0, 步数 24410\n",
      "2019-01-01 12:05:35,989 [INFO] [测试] 回合 32: 步骤 933, 奖励 240.0, 步数 25343\n",
      "2019-01-01 12:06:23,334 [INFO] [测试] 回合 33: 步骤 1286, 奖励 360.0, 步数 26629\n",
      "2019-01-01 12:07:19,554 [INFO] [测试] 回合 34: 步骤 1527, 奖励 440.0, 步数 28156\n",
      "2019-01-01 12:07:41,091 [INFO] [测试] 回合 35: 步骤 594, 奖励 160.0, 步数 28750\n",
      "2019-01-01 12:08:18,880 [INFO] [测试] 回合 36: 步骤 1028, 奖励 220.0, 步数 29778\n",
      "2019-01-01 12:08:49,579 [INFO] [测试] 回合 37: 步骤 821, 奖励 220.0, 步数 30599\n",
      "2019-01-01 12:09:16,539 [INFO] [测试] 回合 38: 步骤 741, 奖励 160.0, 步数 31340\n",
      "2019-01-01 12:09:37,662 [INFO] [测试] 回合 39: 步骤 574, 奖励 160.0, 步数 31914\n",
      "2019-01-01 12:09:57,767 [INFO] [测试] 回合 40: 步骤 548, 奖励 100.0, 步数 32462\n",
      "2019-01-01 12:10:22,196 [INFO] [测试] 回合 41: 步骤 671, 奖励 160.0, 步数 33133\n",
      "2019-01-01 12:10:46,384 [INFO] [测试] 回合 42: 步骤 658, 奖励 140.0, 步数 33791\n",
      "2019-01-01 12:11:21,030 [INFO] [测试] 回合 43: 步骤 952, 奖励 240.0, 步数 34743\n",
      "2019-01-01 12:11:41,534 [INFO] [测试] 回合 44: 步骤 558, 奖励 160.0, 步数 35301\n",
      "2019-01-01 12:12:12,191 [INFO] [测试] 回合 45: 步骤 838, 奖励 220.0, 步数 36139\n",
      "2019-01-01 12:12:37,185 [INFO] [测试] 回合 46: 步骤 683, 奖励 160.0, 步数 36822\n",
      "2019-01-01 12:13:12,896 [INFO] [测试] 回合 47: 步骤 981, 奖励 280.0, 步数 37803\n",
      "2019-01-01 12:13:43,924 [INFO] [测试] 回合 48: 步骤 824, 奖励 200.0, 步数 38627\n",
      "2019-01-01 12:14:10,672 [INFO] [测试] 回合 49: 步骤 705, 奖励 160.0, 步数 39332\n",
      "2019-01-01 12:14:10,675 [INFO] [测试小结] 步数: 平均 = 786.64, 最小 = 466, 最大 = 1527.\n",
      "2019-01-01 12:14:10,688 [INFO] [测试小结] 奖励: 平均 = 196.0, 最小 = 80.0, 最大 = 440.0\n",
      "平均回合奖励 = 196.0\n"
     ]
    }
   ],
   "source": [
    "test_agent = DQNAgent(env, input_shape=input_shape, load_path=save_path)\n",
    "test_episode_rewards = test(env, test_agent, episodes=test_episodes)\n",
    "print('平均回合奖励 = {}'.format(np.mean(test_episode_rewards)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
